<a href="https://colab.research.google.com/github/animesharma3/Movie-Recommendation/blob/main/Nearest_Neighbors_Based_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing Necessary Libraries

In [139]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.neighbors import NearestNeighbors

import pickle

## Loading Dataset

In [140]:
url = 'https://raw.githubusercontent.com/animesharma3/Movie-Recommendation/main/movies_dataset.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,budget,genres,id,imdb_id,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,0,15480,160000000,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",27205,tt1375666,en,Inception,"Cobb, a skilled thief who commits corporate es...",29.108149,"[{'name': 'Legendary Pictures', 'id': 923}, {'...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2010-07-14,825532800.0,148.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Your mind is the scene of the crime.,Inception,8.1,14075.0
1,1,12481,185000000,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",155,tt0468569,en,The Dark Knight,Batman raises the stakes in his war on crime. ...,123.167259,"[{'name': 'DC Comics', 'id': 429}, {'name': 'L...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2008-07-16,1004558000.0,152.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Why So Serious?,The Dark Knight,8.3,12269.0
2,2,14551,237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",19995,tt0499549,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",185.070892,"[{'name': 'Ingenious Film Partners', 'id': 289...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,2787965000.0,162.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Enter the World of Pandora.,Avatar,7.2,12114.0
3,3,17818,220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",24428,tt0848228,en,The Avengers,When an unexpected enemy emerges and threatens...,89.887648,"[{'name': 'Paramount Pictures', 'id': 4}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",2012-04-25,1519558000.0,143.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Some assembly required.,The Avengers,7.4,12000.0
4,4,26564,58000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",293660,tt1431045,en,Deadpool,Deadpool tells the origin story of former Spec...,187.860492,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2016-02-09,783113000.0,108.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Witness the beginning of a happy ending,Deadpool,7.4,11444.0


## Data Preprocessing

### Removing Unnecessary Features

In [141]:
df.drop(df.columns[:2].values.tolist(), axis=1, inplace=True)

In [142]:
df.columns

Index(['budget', 'genres', 'id', 'imdb_id', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')

In [143]:
features = ['budget', 'popularity', 'revenue', 'runtime', 'status', 'vote_average', 'vote_count']
final_df = df[features].copy()

### Missing Values

In [144]:
final_df.isna().sum()

budget           0
popularity       0
revenue          0
runtime         28
status          12
vote_average     0
vote_count       0
dtype: int64

In [145]:
final_df.dropna(inplace=True)

In [146]:
final_df.isna().sum()

budget          0
popularity      0
revenue         0
runtime         0
status          0
vote_average    0
vote_count      0
dtype: int64

### Categorical Encoding -  One Hot Encoding

In [147]:
final_df = pd.get_dummies(final_df)

In [148]:
final_df.columns

Index(['budget', 'popularity', 'revenue', 'runtime', 'vote_average',
       'vote_count', 'status_In Production', 'status_Planned',
       'status_Post Production', 'status_Released', 'status_Rumored'],
      dtype='object')

## Model Building

In [149]:
neigh = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)
neigh.fit(final_df.values)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=20, p=2,
                 radius=1.0)

In [150]:
test = final_df.iloc[0].values.reshape(1,-1)
print(test)

[[1.60000000e+08 2.91081490e+01 8.25532764e+08 1.48000000e+02
  8.10000000e+00 1.40750000e+04 0.00000000e+00 0.00000000e+00
  0.00000000e+00 1.00000000e+00 0.00000000e+00]]


## Recommendation

In [151]:
ind = neigh.kneighbors(test, return_distance=False)
ind

array([[    0,   558, 22648,  5546, 15667,   153,  3081,  3587,  2006,
         1973,  4703,   147,   117,  8126,   937,   615, 10686, 13153,
          268,   323]])

In [152]:
df.iloc[ind[0][1:]]['title']

558      Madagascar 3: Europe's Most Wanted
22648     The Romance of Astrea and Celadon
5546                            Whale Rider
15667                    The Storm Warriors
153                            Finding Dory
3081                           Urban Legend
3587                           The Enforcer
2006                                    JFK
1973                Spy Kids 3-D: Game Over
4703                          Shin Godzilla
147          Rise of the Planet of the Apes
117                     Alice in Wonderland
8126           Iron Man: Rise of Technovore
937                              The Smurfs
615                                The Ring
10686                      The Intervention
13153             SAGA: Curse of the Shadow
268         Transformers: Age of Extinction
323                               The Mummy
Name: title, dtype: object

## Deployment

In [153]:
filename = open('nneighbors_model', 'wb')
pickle.dump(neigh, filename)

In [154]:
model = pickle.load(open('nneighbors_model', 'rb'))

In [155]:
def recommend(movie_data):
    data = movie_data.values.reshape(1,-1)
    ind = model.kneighbors(data, return_distance=False)
    recommendations = df.iloc[ind[0][1:]]['title']
    return recommendations

In [156]:
recommend(final_df.iloc[1])

20463                         All This, and Heaven Too
7994                                           Top Hat
10469                      A Thousand Times Good Night
4835                                       Next Friday
8507                             The End of the Affair
1114                                               Elf
2482                                      Heartbreaker
1034                                             Babel
173       Star Wars: Episode II - Attack of the Clones
4919                                           Quartet
20349                 Ali Zaoua: Prince of the Streets
1366                                          Movie 43
2471                                    Cool Hand Luke
2791     Pokémon: The First Movie: Mewtwo Strikes Back
222                           The Fast and the Furious
12390                                            Julia
468                                           Predator
1118                                    The Ugly Truth
1074      

In [174]:
def recommend1(title, df):
    features = ['budget', 'popularity', 'revenue', 'runtime', 'status', 'vote_average', 'vote_count', 'title']
    final_df = df[features].copy()
    final_df.dropna(inplace=True)
    final_df = pd.get_dummies(final_df, columns=['status'])
    data = final_df[final_df['title'] == title][['budget', 'popularity', 'revenue', 'runtime', 'vote_average', 'vote_count','status_In Production', 'status_Planned', 'status_Post Production', 'status_Released', 'status_Rumored']].values
    ind = model.kneighbors(data, return_distance=False)
    print(df.iloc[ind[0]]['title'])
    # print(ind)

In [175]:
title = df['title'].iloc[0]
recommend1(title=title, df=df)

0                                 Inception
558      Madagascar 3: Europe's Most Wanted
22648     The Romance of Astrea and Celadon
5546                            Whale Rider
15667                    The Storm Warriors
153                            Finding Dory
3081                           Urban Legend
3587                           The Enforcer
2006                                    JFK
1973                Spy Kids 3-D: Game Over
4703                          Shin Godzilla
147          Rise of the Planet of the Apes
117                     Alice in Wonderland
8126           Iron Man: Rise of Technovore
937                              The Smurfs
615                                The Ring
10686                      The Intervention
13153             SAGA: Curse of the Shadow
268         Transformers: Age of Extinction
323                               The Mummy
Name: title, dtype: object
