In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df=pd.read_csv('movies.csv')
df

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,4798,220000,Action Crime Thriller,,9367,united states\u2013mexico barrier legs arms pa...,es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,...,81.0,"[{""iso_639_1"": ""es"", ""name"": ""Espa\u00f1ol""}]",Released,"He didn't come looking for trouble, but troubl...",El Mariachi,6.6,238,Carlos Gallardo Jaime de Hoyos Peter Marquardt...,"[{'name': 'Robert Rodriguez', 'gender': 0, 'de...",Robert Rodriguez
4799,4799,9000,Comedy Romance,,72766,,en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,...,85.0,[],Released,A newlywed couple's honeymoon is upended by th...,Newlyweds,5.9,5,Edward Burns Kerry Bish\u00e9 Marsha Dietlein ...,"[{'name': 'Edward Burns', 'gender': 2, 'depart...",Edward Burns
4800,4800,0,Comedy Drama Romance TV Movie,http://www.hallmarkchannel.com/signedsealeddel...,231617,date love at first sight narration investigati...,en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,...,120.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,"Signed, Sealed, Delivered",7.0,6,Eric Mabius Kristin Booth Crystal Lowe Geoff G...,"[{'name': 'Carla Hetland', 'gender': 0, 'depar...",Scott Smith
4801,4801,0,,http://shanghaicalling.com/,126186,,en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,...,98.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A New Yorker in Shanghai,Shanghai Calling,5.7,7,Daniel Henney Eliza Coupe Bill Paxton Alan Ruc...,"[{'name': 'Daniel Hsia', 'gender': 2, 'departm...",Daniel Hsia


# Selecting the relevant features for recommendation

In [3]:
selected_features=['genres','keywords','overview','tagline','cast','director']
selected_features

['genres', 'keywords', 'overview', 'tagline', 'cast', 'director']

# Replacing null value

In [4]:
for feature in selected_features:
    df[feature]=df[feature].fillna(" ")

# Combining all features

In [5]:
combined_features=df['genres']+' '+df['keywords']+' '+df['overview']+' '+df['tagline']+' '+df['cast']+' '+df['director']
combined_features

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance   A newlywed couple's honeymoon...
4800    Comedy Drama Romance TV Movie date love at fir...
4801        When ambitious New York attorney Sam is se...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object

# Converting the text data to feature vectors

In [6]:
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)
feature_vectors

<4803x30592 sparse matrix of type '<class 'numpy.float64'>'
	with 307355 stored elements in Compressed Sparse Row format>

In [7]:
similarity = cosine_similarity(feature_vectors)
similarity

array([[1.        , 0.05083168, 0.0332947 , ..., 0.02749812, 0.0304889 ,
        0.0072518 ],
       [0.05083168, 1.        , 0.04356836, ..., 0.05077045, 0.03100979,
        0.01521198],
       [0.0332947 , 0.04356836, 1.        , ..., 0.02646984, 0.04751623,
        0.01372603],
       ...,
       [0.02749812, 0.05077045, 0.02646984, ..., 1.        , 0.03481447,
        0.03546821],
       [0.0304889 , 0.03100979, 0.04751623, ..., 0.03481447, 1.        ,
        0.03098945],
       [0.0072518 , 0.01521198, 0.01372603, ..., 0.03546821, 0.03098945,
        1.        ]])

# Get movie name from user

In [8]:
movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : iron man


In [9]:
list_of_all_titles = df['title'].tolist()
list_of_all_titles

['Avatar',
 "Pirates of the Caribbean: At World's End",
 'Spectre',
 'The Dark Knight Rises',
 'John Carter',
 'Spider-Man 3',
 'Tangled',
 'Avengers: Age of Ultron',
 'Harry Potter and the Half-Blood Prince',
 'Batman v Superman: Dawn of Justice',
 'Superman Returns',
 'Quantum of Solace',
 "Pirates of the Caribbean: Dead Man's Chest",
 'The Lone Ranger',
 'Man of Steel',
 'The Chronicles of Narnia: Prince Caspian',
 'The Avengers',
 'Pirates of the Caribbean: On Stranger Tides',
 'Men in Black 3',
 'The Hobbit: The Battle of the Five Armies',
 'The Amazing Spider-Man',
 'Robin Hood',
 'The Hobbit: The Desolation of Smaug',
 'The Golden Compass',
 'King Kong',
 'Titanic',
 'Captain America: Civil War',
 'Battleship',
 'Jurassic World',
 'Skyfall',
 'Spider-Man 2',
 'Iron Man 3',
 'Alice in Wonderland',
 'X-Men: The Last Stand',
 'Monsters University',
 'Transformers: Revenge of the Fallen',
 'Transformers: Age of Extinction',
 'Oz: The Great and Powerful',
 'The Amazing Spider-Man 2',

# find close match for movie

In [10]:
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
find_close_match

['Iron Man', 'Iron Man 3', 'Iron Man 2']

In [11]:
close_match = find_close_match[0]
index_of_the_movie = df[df.title == close_match]['index'].values[0]
index_of_the_movie

68

In [12]:
similarity_score = list(enumerate(similarity[index_of_the_movie]))
similarity_score

[(0, 0.05017469339266746),
 (1, 0.03650601760111055),
 (2, 0.011028940457748972),
 (3, 0.006566052540268534),
 (4, 0.026260064582796727),
 (5, 0.014352343583526092),
 (6, 0.03283993264677229),
 (7, 0.19343464286690049),
 (8, 0.010447641591762173),
 (9, 0.03661745849142617),
 (10, 0.03856206332307925),
 (11, 0.013140335946520334),
 (12, 0.010480204139154865),
 (13, 0.010519252119405034),
 (14, 0.058240542855474524),
 (15, 0.028506070177881645),
 (16, 0.13208839109747222),
 (17, 0.015783602134002372),
 (18, 0.02078306728232427),
 (19, 0.042486448120754944),
 (20, 0.036752005155219786),
 (21, 0.024462379417016164),
 (22, 0.009211246725242383),
 (23, 0.017688499354673565),
 (24, 0.017920651785080486),
 (25, 0.003516675934857519),
 (26, 0.11563787911286948),
 (27, 0.03304503555209013),
 (28, 0.03726807159069121),
 (29, 0.00955791287735387),
 (30, 0.0796908302150445),
 (31, 0.29052114405111434),
 (32, 0.02575250044213904),
 (33, 0.07770036840942475),
 (34, 0.005794060102999554),
 (35, 0.0209

In [13]:
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
sorted_similar_movies

[(68, 1.0000000000000002),
 (79, 0.32928784708836273),
 (31, 0.29052114405111434),
 (7, 0.19343464286690049),
 (16, 0.13208839109747222),
 (511, 0.11735356375367764),
 (4401, 0.11644899793001537),
 (26, 0.11563787911286948),
 (64, 0.11260372706734421),
 (182, 0.10817936918053872),
 (3623, 0.1042757991040167),
 (94, 0.10008582067970911),
 (46, 0.08808497545952933),
 (4033, 0.08653671400103878),
 (783, 0.08640309672854925),
 (174, 0.08596763747178217),
 (618, 0.08422372953874689),
 (85, 0.08317925570431418),
 (3133, 0.0800340820705973),
 (101, 0.07997032837045652),
 (1740, 0.07985500618953217),
 (30, 0.0796908302150445),
 (3466, 0.07863852678986306),
 (33, 0.07770036840942475),
 (203, 0.07717739012595404),
 (1177, 0.07676994814946043),
 (420, 0.07660248386797325),
 (131, 0.07526258364935165),
 (1406, 0.07520922477471761),
 (318, 0.07470160419701297),
 (788, 0.07388756055635985),
 (2229, 0.07375196657625921),
 (122, 0.07319454323849187),
 (1135, 0.07304073502421526),
 (2235, 0.07248187289

# Name of similar movie

In [14]:
print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = df[df.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Iron Man
2 . Iron Man 2
3 . Iron Man 3
4 . Avengers: Age of Ultron
5 . The Avengers
6 . X-Men
7 . The Helix... Loaded
8 . Captain America: Civil War
9 . X-Men: Apocalypse
10 . Ant-Man
11 . Made
12 . Guardians of the Galaxy
13 . X-Men: Days of Future Past
14 . Super
15 . Mortdecai
16 . The Incredible Hulk
17 . Mystery Men
18 . Captain America: The Winter Soldier
19 . The Devil's Tomb
20 . X-Men: First Class
21 . Kick-Ass 2
22 . Spider-Man 2
23 . Sliding Doors
24 . X-Men: The Last Stand
25 . X2
26 . Sin City
27 . Hellboy II: The Golden Army
28 . G-Force
29 . The Nativity Story


# Movie Recommendation Sytem

In [15]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = df['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = df[df.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = df[df.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name : jurassic park
Movies suggested for you : 

1 . Jurassic Park
2 . The Lost World: Jurassic Park
3 . Jurassic World
4 . Jurassic Park III
5 . The Land Before Time
6 . Walking With Dinosaurs
7 . Species
8 . Sea Rex 3D: Journey to a Prehistoric World
9 . The Good Dinosaur
10 . Gattaca
11 . The Nut Job
12 . Rise of the Entrepreneur: The Search for a Better Way
13 . Close Encounters of the Third Kind
14 . E.T. the Extra-Terrestrial
15 . Splice
16 . Adventureland
17 . Swimming Pool
18 . National Lampoon's Vacation
19 . Vacation
20 . The Island of Dr. Moreau
21 . The Curse of the Were-Rabbit
22 . A.I. Artificial Intelligence
23 . Quest for Fire
24 . Sparkler
25 . Yogi Bear
26 . G.I. Joe: Retaliation
27 . Moon
28 . The Great Escape
29 . Gremlins 2: The New Batch
