# Movie Recommendation system

In [None]:
import numpy as np
import pandas as pd

# Importing the datasets
moviesData = pd.read_csv('/content/tmdb_5000_movies.csv')
creditsData = pd.read_csv('/content/tmdb_5000_credits.csv')
moviesData.head(1)

In [None]:
# Merging the dataset based on the title.
moviesData = moviesData.merge(credits,on = "title")
moviesData.head(1)

In [None]:
moviesData.head(1)

In [36]:
# Using the only columns which are required for our predictions
# genres
# id
# keywords
# title
# overview
# cast
# crew

moviesData = moviesData[['movie_id','title','genres','keywords','overview','cast','crew']]

In [None]:
moviesData.head(1)

In [38]:
import ast

def convert(text):
  L = []
  for i in ast.literal_eval(text):
    L.append(i['name'])
  return L


In [39]:
moviesData.dropna(inplace = True)


In [42]:
moviesData['genres'] = moviesData['genres'].apply(convert)

In [None]:
moviesData.head(1)

In [None]:
moviesData['keywords'] = moviesData['keywords'].apply(convert)
moviesData.head(1)

In [46]:
# cast Convert
def cast_convert(text):
  L = []
  counter = 0
  for i in ast.literal_eval(text):
    if counter < 3:
      L.append(i['name'])
    counter += 1
  return L

In [None]:
moviesData['cast'] = moviesData['cast'].apply(cast_convert)
moviesData.head(1)

In [48]:
moviesData['cast'] = moviesData['cast'].apply(lambda x:x[0:3])

In [49]:
# fetch director
def fectch_director(text):
  L = []
  for i in ast.literal_eval(text):
    if i['job'] == 'Director':
      L.append(i['name'])
  return L



In [50]:
moviesData['crew'] = moviesData['crew'].apply(fectch_director)

In [None]:
moviesData.head(1)

In [None]:
moviesData['overview'] = moviesData['overview'].apply(lambda x:x.split())
moviesData.sample(5)

In [54]:
# function to remove the spaces to prevent confusion for the model

def remove_space(L):
  l1 = []
  for i in L:
    l1.append(i.replace(" ",""))
  return l1


In [55]:
moviesData['cast'] = moviesData['cast'].apply(remove_space)
moviesData['crew'] = moviesData['crew'].apply(remove_space)
moviesData['genres'] = moviesData['genres'].apply(remove_space)
moviesData['keywords'] = moviesData['keywords'].apply(remove_space)

In [None]:
moviesData.head(1)

In [58]:
moviesData['tags'] = moviesData['overview'] + moviesData['genres'] + moviesData['keywords'] + moviesData['cast'] + moviesData['crew']


In [60]:
new_df = moviesData[['movie_id','title','tags']]

In [63]:
new_df['tags'] = new_df['tags'].apply(lambda x:" ".join(x))

In [66]:
new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())

In [None]:
new_df.head(1)

In [68]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')


In [69]:
vector = cv.fit_transform(new_df['tags']).toarray()

In [70]:
vector.shape

(4806, 5000)

In [71]:
from sklearn.metrics.pairwise import cosine_similarity

In [72]:
similarity = cosine_similarity(vector)

In [74]:
new_df[new_df['title'] == 'The Lego Movie'].index[0]

np.int64(744)

In [75]:
# Function to Recommend movies

def recommend(movie):
    index = new_df[new_df['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(new_df.iloc[i[0]].title)


In [79]:
recommend('Titanic')

Captain Phillips
The Notebook
In the Heart of the Sea
Ghost Ship
Poseidon


In [81]:
import pickle

pickle.dump(new_df.to_dict(),open('movie_dict.pkl','wb'))

In [82]:
pickle.dump(similarity,open('similarity.pkl','wb'))
