# Importing dependencies

In [1]:
import numpy as np
import pandas as pd
import difflib # To find the initial closest match of user input movie from the dataset
from sklearn.feature_extraction.text import TfidfVectorizer # To convert textual vector to numerical vector
from sklearn.metrics.pairwise import cosine_similarity # To use cosine similarity

# Data Collection and preprocessing


In [2]:
# Merging two DataFrames
df_movies = pd.read_csv("../Movie_recommendation_system/tmdb_5000_movies.csv")
df_credits = pd.read_csv("../Movie_recommendation_system/tmdb_5000_credits.csv")

# Renaming column name from "id" to "movie_id"
df_movies = df_movies.rename(columns = {"id":"movie_id"})

# Merging two DataFrames for efficient recommendation 
movie_df = pd.merge(df_movies,df_credits,on = "movie_id")

movie_df = movie_df.drop(["title_x","title_y"],axis = 1)

movie_df


Unnamed: 0,budget,genres,homepage,movie_id,keywords,original_language,original_title,overview,popularity,production_companies,...,release_date,revenue,runtime,spoken_languages,status,tagline,vote_average,vote_count,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,7.2,11800,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",6.9,4500,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,6.3,4466,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,7.6,9106,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",6.1,2124,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,220000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",,9367,"[{""id"": 5616, ""name"": ""united states\u2013mexi...",es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,"[{""name"": ""Columbia Pictures"", ""id"": 5}]",...,1992-09-04,2040920,81.0,"[{""iso_639_1"": ""es"", ""name"": ""Espa\u00f1ol""}]",Released,"He didn't come looking for trouble, but troubl...",6.6,238,"[{""cast_id"": 1, ""character"": ""El Mariachi"", ""c...","[{""credit_id"": ""52fe44eec3a36847f80b280b"", ""de..."
4799,9000,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 10749, ""...",,72766,[],en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,[],...,2011-12-26,0,85.0,[],Released,A newlywed couple's honeymoon is upended by th...,5.9,5,"[{""cast_id"": 1, ""character"": ""Buzzy"", ""credit_...","[{""credit_id"": ""52fe487dc3a368484e0fb013"", ""de..."
4800,0,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...",http://www.hallmarkchannel.com/signedsealeddel...,231617,"[{""id"": 248, ""name"": ""date""}, {""id"": 699, ""nam...",en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,"[{""name"": ""Front Street Pictures"", ""id"": 3958}...",...,2013-10-13,0,120.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,7.0,6,"[{""cast_id"": 8, ""character"": ""Oliver O\u2019To...","[{""credit_id"": ""52fe4df3c3a36847f8275ecf"", ""de..."
4801,0,[],http://shanghaicalling.com/,126186,[],en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,[],...,2012-05-03,0,98.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A New Yorker in Shanghai,5.7,7,"[{""cast_id"": 3, ""character"": ""Sam"", ""credit_id...","[{""credit_id"": ""52fe4ad9c3a368484e16a36b"", ""de..."


In [3]:
relevant_features = ['genres','keywords','tagline','cast','director']


# Filling all NaNs with empty string to all columns
for column in movie_df.columns:

    if movie_df[column].isnull().any() == True:
        movie_df[column] = movie_df[column].fillna('')


In [4]:
# Combining all the selected features
combined_features = movie_df["genres"] + " " + movie_df["keywords"] + " " + movie_df["overview"] + " " + movie_df["tagline"]+ " " +movie_df["cast"]+" "+movie_df["production_companies"]



In [5]:
# Converting the textual data to numerical data
vectorizer = TfidfVectorizer()

In [6]:
feature_numerical_vectors = vectorizer.fit_transform(combined_features)


# Applying cosine similarity

In [7]:
# Getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_numerical_vectors)


In [9]:
favt_movie_name = input("Enter your favorite movie name:")

Enter your favorite movie name:The dark knight


In [10]:
# Getting a list of all the movie names mentioned in the dataset
list_of_all_movies = movie_df["original_title"].tolist()


In [11]:
# Finding closest match for the movie name given by the user
find_match_movies = difflib.get_close_matches(favt_movie_name,list_of_all_movies)
closest_match_movie = find_match_movies[0]
closest_match_movie

'The Dark Knight'

In [12]:
# Finding the index of the movie provided by the user
index_of_favt_movie = movie_df[movie_df["original_title"] == closest_match_movie].index[0]
index_of_favt_movie

65

In [13]:
# Getting similarity row of particular index_of_favt_movie

# Here enumerate() is used to get both the index and the corresponding similarity score of each movie, returns a list of tuples(index,similarity score)
similarity_score_of_favt_movie = list(enumerate(similarity[index_of_favt_movie]))



In [14]:
# Sorting the list in decreasing order, so that we can choose the top most similar movies which has highest similarity score
sorted_similar_movies = sorted(similarity_score_of_favt_movie, key = lambda x:x[1],reverse=True)


# Retrieving top similar movies with that of given movie

In [15]:
# Print the names of similar movies based on index value
print("Similar movies recommended for you!!\n\n")
i = 1

for movie in sorted_similar_movies:
    
    index = movie[0]
    similarity_num = movie[1]
    recommended_movie = movie_df.iloc[index]["original_title"]
    
    if recommended_movie == closest_match_movie:
        continue
        
    if(i < 16):
        print(recommended_movie,similarity_num)
        i += 1
    else:
        break

Similar movies recommended for you!!


The Dark Knight Rises 0.7428899434846056
American Gangster 0.7403957715940724
Spider-Man 3 0.729440031728997
The Life of David Gale 0.7257238624001825
Batman v Superman: Dawn of Justice 0.7206832420838649
Inherent Vice 0.7120768970580503
Django Unchained 0.7099627374198296
GoodFellas 0.7068113127335884
Gone Girl 0.7052289414861093
The Avengers 0.7018465420151908
15 Minutes 0.7016544600686186
Batman Begins 0.7009026288916638
Suicide Squad 0.7004651051039957
Hot Pursuit 0.6999328030553839
The Hangover 0.6998411983775945


# Desiging the GUI part