In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
movies_data = pd.read_csv('movies.csv')

In [4]:
movies_data.shape

(4803, 24)

In [5]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [6]:
movies_data.describe()

Unnamed: 0,index,budget,id,popularity,revenue,runtime,vote_average,vote_count
count,4803.0,4803.0,4803.0,4803.0,4803.0,4801.0,4803.0,4803.0
mean,2401.0,29045040.0,57165.484281,21.492301,82260640.0,106.875859,6.092172,690.217989
std,1386.651002,40722390.0,88694.614033,31.81665,162857100.0,22.611935,1.194612,1234.585891
min,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
25%,1200.5,790000.0,9014.5,4.66807,0.0,94.0,5.6,54.0
50%,2401.0,15000000.0,14629.0,12.921594,19170000.0,103.0,6.2,235.0
75%,3601.5,40000000.0,58610.5,28.313505,92917190.0,118.0,6.8,737.0
max,4802.0,380000000.0,459488.0,875.581305,2787965000.0,338.0,10.0,13752.0


In [8]:
selected_features = ['genres','keywords','tagline','cast','director']

In [9]:
for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [10]:
combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [11]:
combined_features 

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object

In [12]:
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

In [13]:
feature_vectors

<4803x15858 sparse matrix of type '<class 'numpy.float64'>'
	with 124263 stored elements in Compressed Sparse Row format>

In [14]:
#COSINE SIMILARITY: GET SIMILARITY SCORES
similarity = cosine_similarity(feature_vectors)

In [15]:
similarity

array([[1.        , 0.0775839 , 0.01745812, ..., 0.00221788, 0.00247375,
        0.00253428],
       [0.0775839 , 1.        , 0.03710254, ..., 0.03978988, 0.00222238,
        0.00227676],
       [0.01745812, 0.03710254, 1.        , ..., 0.00231541, 0.03516011,
        0.00264573],
       ...,
       [0.00221788, 0.03978988, 0.00231541, ..., 1.        , 0.00255461,
        0.03800792],
       [0.00247375, 0.00222238, 0.03516011, ..., 0.00255461, 1.        ,
        0.00291906],
       [0.00253428, 0.00227676, 0.00264573, ..., 0.03800792, 0.00291906,
        1.        ]])

In [16]:
similarity.shape

(4803, 4803)

In [18]:
#get movie name from user
movie_name = input("Enter movie")

Enter movie mission impossible 3


In [19]:
#create list of all movies
list_of_all_titles = movies_data['title'].tolist()

In [21]:
list_of_all_titles

['Avatar',
 "Pirates of the Caribbean: At World's End",
 'Spectre',
 'The Dark Knight Rises',
 'John Carter',
 'Spider-Man 3',
 'Tangled',
 'Avengers: Age of Ultron',
 'Harry Potter and the Half-Blood Prince',
 'Batman v Superman: Dawn of Justice',
 'Superman Returns',
 'Quantum of Solace',
 "Pirates of the Caribbean: Dead Man's Chest",
 'The Lone Ranger',
 'Man of Steel',
 'The Chronicles of Narnia: Prince Caspian',
 'The Avengers',
 'Pirates of the Caribbean: On Stranger Tides',
 'Men in Black 3',
 'The Hobbit: The Battle of the Five Armies',
 'The Amazing Spider-Man',
 'Robin Hood',
 'The Hobbit: The Desolation of Smaug',
 'The Golden Compass',
 'King Kong',
 'Titanic',
 'Captain America: Civil War',
 'Battleship',
 'Jurassic World',
 'Skyfall',
 'Spider-Man 2',
 'Iron Man 3',
 'Alice in Wonderland',
 'X-Men: The Last Stand',
 'Monsters University',
 'Transformers: Revenge of the Fallen',
 'Transformers: Age of Extinction',
 'Oz: The Great and Powerful',
 'The Amazing Spider-Man 2',

In [23]:
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

In [24]:
find_close_match 

['Mission: Impossible', 'Mission: Impossible II', 'Mission: Impossible III']

In [25]:
close_match = find_close_match[0]

In [26]:
close_match

'Mission: Impossible'

In [27]:
#find index of movie with title
index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

In [28]:
index_of_the_movie

425

In [29]:
similarity_score = list(enumerate(similarity[index_of_the_movie]))

In [30]:
similarity_score 

[(0, 0.02491863046409929),
 (1, 0.04138030876091872),
 (2, 0.060395502911815765),
 (3, 0.1011114523345565),
 (4, 0.01904567342887477),
 (5, 0.028026713309068804),
 (6, 0.012536085908340253),
 (7, 0.020274576860993876),
 (8, 0.035350401610427976),
 (9, 0.059845587468993725),
 (10, 0.0308457261245859),
 (11, 0.023353060027467495),
 (12, 0.020326120097369497),
 (13, 0.024986146532727196),
 (14, 0.0663456720178753),
 (15, 0.01172911962244588),
 (16, 0.019100501175241126),
 (17, 0.018918629383725547),
 (18, 0.01517387992939592),
 (19, 0.026716039071010316),
 (20, 0.025467277106259078),
 (21, 0.016100122782576363),
 (22, 0.016063070204021136),
 (23, 0.045689428230667566),
 (24, 0.029265311453268442),
 (25, 0.009308896123788837),
 (26, 0.020932501276795155),
 (27, 0.029274692272414093),
 (28, 0.032769610787032256),
 (29, 0.06907078071526188),
 (30, 0.021078860819114555),
 (31, 0.07524107873185873),
 (32, 0.011180179072936145),
 (33, 0.026882211463695998),
 (34, 0.0025756663482182627),
 (35, 0

In [31]:
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
sorted_similar_movies

[(425, 1.0000000000000004),
 (213, 0.2935485862023165),
 (677, 0.21678794995753542),
 (1797, 0.19141946162573342),
 (433, 0.1834962218062578),
 (134, 0.18321670625745726),
 (4339, 0.18318950802882952),
 (1092, 0.18228602277115513),
 (142, 0.1804369906687188),
 (737, 0.1799492315039069),
 (201, 0.17952858273241748),
 (739, 0.16686259794833241),
 (1586, 0.15776745704421866),
 (3343, 0.15764125967096923),
 (2675, 0.15576134694945323),
 (840, 0.14430799907161085),
 (245, 0.14071882162924298),
 (3273, 0.13387409429297042),
 (3377, 0.13194491007238449),
 (1848, 0.12859009792240894),
 (2972, 0.1252805146288447),
 (2361, 0.12409334679181416),
 (773, 0.12383004321744863),
 (139, 0.12274825866658318),
 (1439, 0.12245114713939764),
 (164, 0.12135461365663769),
 (2371, 0.12110357726844344),
 (2360, 0.11997096341323749),
 (437, 0.1194926944063795),
 (291, 0.11818841902487537),
 (1417, 0.11814950128826895),
 (263, 0.1169965909599828),
 (227, 0.11637610870237561),
 (1159, 0.1162962791506895),
 (601, 

In [35]:
print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<21):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Mission: Impossible
2 . Mission: Impossible II
3 . Clear and Present Danger
4 . Agent Cody Banks 2: Destination London
5 . RED 2
6 . Mission: Impossible - Rogue Nation
7 . Dr. No
8 . The Ghost Writer
9 . Flushed Away
10 . Jack Ryan: Shadow Recruit
11 . The Da Vinci Code
12 . London Has Fallen
13 . A View to a Kill
14 . Live and Let Die
15 . The Spy Who Loved Me
16 . Ronin
17 . Tomorrow Never Dies
18 . 8 Women
19 . A Dog Of Flanders
20 . Agent Cody Banks


In [39]:
#Movie Recommendation System Final
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name :  avengers


Movies suggested for you : 

1 . The Avengers
2 . Avengers: Age of Ultron
3 . Captain America: The Winter Soldier
4 . Captain America: Civil War
5 . Iron Man 2
6 . Thor: The Dark World
7 . X-Men
8 . The Incredible Hulk
9 . X-Men: Apocalypse
10 . Thor
11 . Ant-Man
12 . X2
13 . X-Men: The Last Stand
14 . Deadpool
15 . X-Men: Days of Future Past
16 . Captain America: The First Avenger
17 . The Amazing Spider-Man 2
18 . Iron Man
19 . The Image Revolution
20 . Iron Man 3
21 . Man of Steel
22 . X-Men: First Class
23 . Superman II
24 . The Spirit
25 . Guardians of the Galaxy
26 . Batman v Superman: Dawn of Justice
27 . 2 Guns
28 . Teenage Mutant Ninja Turtles: Out of the Shadows
29 . The Helix... Loaded
