<a href="https://colab.research.google.com/github/geetanjali317/MoviesRecommenderSystem/blob/main/MovieRecommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
movies_data = pd.read_csv('/content/movies.csv')

In [None]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [None]:
movies_data.shape

(4803, 24)

In [None]:
movies_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   index                 4803 non-null   int64  
 1   budget                4803 non-null   int64  
 2   genres                4775 non-null   object 
 3   homepage              1712 non-null   object 
 4   id                    4803 non-null   int64  
 5   keywords              4391 non-null   object 
 6   original_language     4803 non-null   object 
 7   original_title        4803 non-null   object 
 8   overview              4800 non-null   object 
 9   popularity            4803 non-null   float64
 10  production_companies  4803 non-null   object 
 11  production_countries  4803 non-null   object 
 12  release_date          4802 non-null   object 
 13  revenue               4803 non-null   int64  
 14  runtime               4801 non-null   float64
 15  spoken_languages     

In [None]:
movies_data.isnull().sum()

Unnamed: 0,0
index,0
budget,0
genres,28
homepage,3091
id,0
keywords,412
original_language,0
original_title,0
overview,3
popularity,0


Features Selection

In [None]:
selected_feature = ['genres','keywords','tagline','cast','director']
print(selected_feature)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [None]:
for features in selected_feature:
  movies_data[features] = movies_data[features].fillna('')

In [None]:
combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [None]:
print(combined_features)

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


Vectorization

In [None]:
vectorizer = TfidfVectorizer()

In [None]:
vectorizer.fit(combined_features)

In [None]:
feature_vectors = vectorizer.transform(combined_features)

Cosine Similarity

In [None]:
similarity = cosine_similarity(feature_vectors)

In [None]:
print(similarity)

[[1.         0.07219487 0.037733   ... 0.         0.         0.        ]
 [0.07219487 1.         0.03281499 ... 0.03575545 0.         0.        ]
 [0.037733   0.03281499 1.         ... 0.         0.05389661 0.        ]
 ...
 [0.         0.03575545 0.         ... 1.         0.         0.02651502]
 [0.         0.         0.05389661 ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.02651502 0.         1.        ]]


In [None]:
print(similarity.shape)

(4803, 4803)


In [None]:
movie_names = input("Enter Favority movie name : ")

Enter Favority movie name : InsideOut


In [None]:
list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)

['Avatar', "Pirates of the Caribbean: At World's End", 'Spectre', 'The Dark Knight Rises', 'John Carter', 'Spider-Man 3', 'Tangled', 'Avengers: Age of Ultron', 'Harry Potter and the Half-Blood Prince', 'Batman v Superman: Dawn of Justice', 'Superman Returns', 'Quantum of Solace', "Pirates of the Caribbean: Dead Man's Chest", 'The Lone Ranger', 'Man of Steel', 'The Chronicles of Narnia: Prince Caspian', 'The Avengers', 'Pirates of the Caribbean: On Stranger Tides', 'Men in Black 3', 'The Hobbit: The Battle of the Five Armies', 'The Amazing Spider-Man', 'Robin Hood', 'The Hobbit: The Desolation of Smaug', 'The Golden Compass', 'King Kong', 'Titanic', 'Captain America: Civil War', 'Battleship', 'Jurassic World', 'Skyfall', 'Spider-Man 2', 'Iron Man 3', 'Alice in Wonderland', 'X-Men: The Last Stand', 'Monsters University', 'Transformers: Revenge of the Fallen', 'Transformers: Age of Extinction', 'Oz: The Great and Powerful', 'The Amazing Spider-Man 2', 'TRON: Legacy', 'Cars 2', 'Green Lant

In [None]:
find_close_match = difflib.get_close_matches(movie_names, list_of_all_titles)
print(find_close_match)

['Inside Out', 'Insidious', 'Inside Man']


In [None]:
index_of_movie = movies_data[movies_data.title == find_close_match[0]]['index'].values[0]

In [None]:
print(index_of_movie)

77


In [None]:
similarity_score = list(enumerate(similarity[index_of_movie]))
print(similarity_score)

[(0, 0.004921922497109893), (1, 0.012887592319338429), (2, 0.0), (3, 0.00895868855790289), (4, 0.0), (5, 0.005098726363042803), (6, 0.05227796461011297), (7, 0.0), (8, 0.01911316501774308), (9, 0.0272106127835911), (10, 0.0055360296899486265), (11, 0.0), (12, 0.02172531673449881), (13, 0.004537359230320509), (14, 0.032302681287611684), (15, 0.01766770551518161), (16, 0.0), (17, 0.0), (18, 0.029190794410405587), (19, 0.02290822865434564), (20, 0.0049014943940469515), (21, 0.0), (22, 0.01941296824110514), (23, 0.008512044877101577), (24, 0.03520734481721748), (25, 0.003122675107328781), (26, 0.0), (27, 0.0044836265437937835), (28, 0.004922492833662004), (29, 0.018233571445983535), (30, 0.0), (31, 0.033627664114617956), (32, 0.017120482464798942), (33, 0.0), (34, 0.11911507338114274), (35, 0.0), (36, 0.0), (37, 0.019457337896172718), (38, 0.0), (39, 0.004635284936961563), (40, 0.10346892973255421), (41, 0.0), (42, 0.0889434446354993), (43, 0.01000613471769234), (44, 0.0), (45, 0.002813499

In [None]:
len(similarity_score)

4803

In [None]:
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(77, 1.0000000000000002), (66, 0.19493403996577952), (3670, 0.1902059436232738), (1656, 0.17606210101328207), (2199, 0.17241754545775412), (837, 0.16409613016731006), (231, 0.15725185637468309), (3963, 0.15175632267384637), (1519, 0.1438293118661605), (347, 0.14311547750374226), (2049, 0.14288305931815304), (258, 0.14053427385747153), (496, 0.1369887208335055), (3038, 0.13109899783287735), (1587, 0.1289594189189277), (234, 0.12633220512068327), (2330, 0.12395423716971292), (34, 0.11911507338114274), (525, 0.1187173786736273), (1695, 0.11797532687145147), (550, 0.11742225815184468), (4641, 0.1154555801639743), (503, 0.11477006993285893), (175, 0.11401690273649406), (1062, 0.11281508881418753), (1845, 0.11227289896797832), (960, 0.10950296650174418), (1371, 0.10944618736392209), (3196, 0.10926813324070908), (268, 0.1091069589393581), (221, 0.10856071606226231), (3403, 0.10786248169316043), (2292, 0.10702357538824706), (1682, 0.10631569344049736), (40, 0.10346892973255421), (1574, 0.1013

In [None]:
print("Movies Suggested:")

i=0

for movies in sorted_similar_movies:
  index = movies[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i < 30):
    print(i, title_from_index)
    i+=1

Movies Suggested:
0 Inside Out
1 Up
2 Running Forever
3 Hoodwinked Too! Hood VS. Evil
4 Envy
5 Free Birds
6 Monsters, Inc.
7 They Came Together
8 Bogus
9 Cloudy with a Chance of Meatballs
10 Dudley Do-Right
11 The Smurfs 2
12 Cloudy with a Chance of Meatballs 2
13 Hey Arnold! The Movie
14 The Curse of the Were-Rabbit
15 The Croods
16 Mean Girls
17 Monsters University
18 Shark Tale
19 Aladdin
20 The Angry Birds Movie
21 I Married a Strange Person!
22 The Adventures of Rocky & Bullwinkle
23 The BFG
24 A Bug's Life
25 Hide and Seek
26 The Adventures of Sharkboy and Lavagirl
27 Trainwreck
28 Opal Dream
29 Stuart Little
