In [2]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [4]:
# You can get the dataset at - https://www.kaggle.com/tmdb/tmdb-movie-metadata

df1=pd.read_csv('tmdb_5000_credits.csv')
df2=pd.read_csv('tmdb_5000_movies.csv')
df = df2.merge(df1)
df.head()

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,206647,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,49026,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,49529,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [5]:
df['overview'][0:10]

0    In the 22nd century, a paraplegic Marine is di...
1    Captain Barbossa, long believed to be dead, ha...
2    A cryptic message from Bond’s past sends him o...
3    Following the death of District Attorney Harve...
4    John Carter is a war-weary, former military ca...
5    The seemingly invincible Spider-Man goes up ag...
6    When the kingdom's most wanted-and most charmi...
7    When Tony Stark tries to jumpstart a dormant p...
8    As Harry begins his sixth year at Hogwarts, he...
9    Fearing the actions of a god-like Super Hero l...
Name: overview, dtype: object

In [6]:
# creating tf-idf vectorizer object

tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')

df['overview'] = df['overview'].fillna('')

tfidf_matrix = tf.fit_transform(df['overview'])

tfidf_matrix.shape

(4809, 136949)

In [8]:
# calculating cosine similarity between the documents

cos = linear_kernel(tfidf_matrix, tfidf_matrix)
# cos

In [9]:
indices = pd.Series(df2.index, index=df2['title']).drop_duplicates()   # map for indices to movies

In [10]:
def get_recommendations(title, cosine_sim = cos):
    idx = indices[title]

    similarity_scores = list(enumerate(cosine_sim[idx]))
#     print(sim_scores)
    similarity_scores = sorted(similarity_scores, key = lambda x: x[1], reverse = True)

    similarity_scores = similarity_scores[1:21]    # ignore first because it is the same movie

    movie_indices = [i[0] for i in similarity_scores]
    
    movies = [x for x in df['title'].iloc[movie_indices]]
    
    return movies

In [11]:
get_recommendations('The Dark Knight')

['The Dark Knight Rises',
 'Batman Forever',
 'Batman Returns',
 'Batman',
 'Batman',
 'Batman: The Dark Knight Returns, Part 2',
 'Slow Burn',
 'JFK',
 'Batman Begins',
 'Sherlock Holmes: A Game of Shadows',
 'Law Abiding Citizen',
 'Secret in Their Eyes',
 'Batman v Superman: Dawn of Justice',
 'Batman & Robin',
 'The Rookie',
 'Rush Hour 3',
 '12 Rounds',
 "Gangster's Paradise: Jerusalema",
 'Despicable Me 2',
 'Legal Eagles']

In [12]:
get_recommendations('Avatar')

['The Matrix',
 'Apollo 18',
 'The Inhabited Island',
 'Tears of the Sun',
 'The American',
 'Supernova',
 'Semi-Pro',
 'The Adventures of Pluto Nash',
 'Hanna',
 'The Book of Life',
 'Aliens vs Predator: Requiem',
 'Blood and Chocolate',
 'Insomnia',
 'E.T. the Extra-Terrestrial',
 'Birthday Girl',
 'Transformers: Age of Extinction',
 'Ransom',
 'Just Visiting',
 'Falcon Rising',
 'Two Lovers']

In [13]:
get_recommendations('Spectre')

['Never Say Never Again',
 'Thunderball',
 'From Russia with Love',
 'Quantum of Solace',
 'Hackers',
 'The Man with the Golden Gun',
 'The Living Daylights',
 'Safe Haven',
 'Restless',
 'Dr. No',
 'Skyfall',
 'Diamonds Are Forever',
 'In Her Line of Fire',
 'The Interpreter',
 'Dance Flick',
 'Licence to Kill',
 'Watchmen',
 'Octopussy',
 'The Sentinel',
 'Romeo Must Die']