In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
movies_data = pd.read_csv('movies.csv')

movies_data.head(1)

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron


In [None]:
selected_features = ['genres', 'keywords', 'tagline', 'cast', 'overview', 'director']

print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'overview', 'director']


In [None]:
#Replace missing values

for i in selected_features:
  movies_data[i] = movies_data[i].fillna('')

In [None]:
combined_features = movies_data['genres'] + " " + movies_data['keywords'] + " " + movies_data['tagline'] + " " + movies_data['cast'] + " " + movies_data['overview'] + movies_data['director']

print(combined_features)

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


In [None]:
#convert text to feature vectors

vectorizer = TfidfVectorizer()

In [None]:
feature_vectors = vectorizer.fit_transform(combined_features)

print(feature_vectors)

  (0, 4303)	0.13211697909238707
  (0, 14229)	0.08428210366497216
  (0, 5266)	0.1650508284879406
  (0, 1017)	0.12284517243572439
  (0, 1249)	0.052701399339630424
  (0, 21587)	0.1664830865066961
  (0, 1287)	0.031948452721142516
  (0, 19708)	0.1611881679897429
  (0, 10409)	0.13846782398366933
  (0, 2987)	0.0997305214173566
  (0, 27593)	0.1456764731997764
  (0, 2697)	0.09804283107901834
  (0, 4143)	0.06195069192235284
  (0, 18082)	0.10938225477397243
  (0, 28676)	0.15489080805700953
  (0, 19599)	0.04907792260417852
  (0, 18306)	0.14768215884090247
  (0, 27483)	0.03101167039938542
  (0, 7861)	0.1797794066196038
  (0, 14071)	0.04169642596274392
  (0, 17075)	0.14322987834912998
  (0, 20163)	0.1925795185659098
  (0, 4785)	0.1250020036061809
  (0, 241)	0.1971480762666419
  (0, 13522)	0.03647447113285595
  :	:
  (4802, 27231)	0.059398713414339374
  (4802, 19300)	0.0613472706578673
  (4802, 11987)	0.07828909250866234
  (4802, 29763)	0.07735276490070489
  (4802, 9385)	0.07104072336495271
  (4802, 

Cosine Similarity

In [None]:
similarity = cosine_similarity(feature_vectors)

print(similarity)

[[1.         0.05082576 0.03338469 ... 0.02749301 0.03061594 0.00725077]
 [0.05082576 1.         0.04356297 ... 0.05076605 0.03098525 0.01521132]
 [0.03338469 0.04356297 1.         ... 0.02648674 0.04768854 0.01372397]
 ...
 [0.02749301 0.05076605 0.02648674 ... 1.         0.03478353 0.03549349]
 [0.03061594 0.03098525 0.04768854 ... 0.03478353 1.         0.03096325]
 [0.00725077 0.01521132 0.01372397 ... 0.03549349 0.03096325 1.        ]]


In [None]:
movie_name = input("Enter movie name: ")

Enter movie name: ironman


In [None]:
movie_list = movies_data['title'].tolist()

print(movie_list)

['Avatar', "Pirates of the Caribbean: At World's End", 'Spectre', 'The Dark Knight Rises', 'John Carter', 'Spider-Man 3', 'Tangled', 'Avengers: Age of Ultron', 'Harry Potter and the Half-Blood Prince', 'Batman v Superman: Dawn of Justice', 'Superman Returns', 'Quantum of Solace', "Pirates of the Caribbean: Dead Man's Chest", 'The Lone Ranger', 'Man of Steel', 'The Chronicles of Narnia: Prince Caspian', 'The Avengers', 'Pirates of the Caribbean: On Stranger Tides', 'Men in Black 3', 'The Hobbit: The Battle of the Five Armies', 'The Amazing Spider-Man', 'Robin Hood', 'The Hobbit: The Desolation of Smaug', 'The Golden Compass', 'King Kong', 'Titanic', 'Captain America: Civil War', 'Battleship', 'Jurassic World', 'Skyfall', 'Spider-Man 2', 'Iron Man 3', 'Alice in Wonderland', 'X-Men: The Last Stand', 'Monsters University', 'Transformers: Revenge of the Fallen', 'Transformers: Age of Extinction', 'Oz: The Great and Powerful', 'The Amazing Spider-Man 2', 'TRON: Legacy', 'Cars 2', 'Green Lant

In [None]:
#Checking

find_close_match = difflib.get_close_matches(movie_name, movie_list)

print(find_close_match)
close_match = find_close_match[1]
print(close_match)

['Birdman', 'Iron Man', 'Hitman']
Iron Man


In [None]:
movie_index = movies_data[movies_data.title == close_match]['index'].values[0]

print(movie_index)

68


In [None]:
similarity_score = list(enumerate(similarity[movie_index]))

print(similarity_score)

[(0, 0.05016748136669368), (1, 0.036504403551930735), (2, 0.011027273580371763), (3, 0.006565349603578827), (4, 0.026252841357530816), (5, 0.01435093694421185), (6, 0.03283855555197044), (7, 0.1934616198542664), (8, 0.010445813509394912), (9, 0.03661485153710735), (10, 0.03855879505272357), (11, 0.013139190633502726), (12, 0.010479556676668451), (13, 0.010518343331398772), (14, 0.05823429142098432), (15, 0.028503875374706913), (16, 0.13207167142960982), (17, 0.01578090058867321), (18, 0.020779584107972877), (19, 0.04248324989905034), (20, 0.03674703171157746), (21, 0.02448074456031117), (22, 0.009210348492528605), (23, 0.01768586299769241), (24, 0.017916926183076293), (25, 0.003516518632235872), (26, 0.11562987214995633), (27, 0.03304279411965905), (28, 0.03726553554947211), (29, 0.009556897181034033), (30, 0.07968131191098515), (31, 0.2905748049493533), (32, 0.025750144781740727), (33, 0.07769094217953838), (34, 0.005793633621669453), (35, 0.020962008308371926), (36, 0.025302742178995

In [None]:
sorted_similar_movies = sorted(similarity_score, key = lambda x: x[1], reverse = True)

print(sorted_similar_movies)

[(68, 1.0000000000000002), (79, 0.32931635676622034), (31, 0.2905748049493533), (7, 0.1934616198542664), (16, 0.13207167142960982), (511, 0.11734208117424358), (4401, 0.11644483277827958), (26, 0.11562987214995633), (64, 0.11259716951754226), (182, 0.10817373034603034), (3623, 0.10427067420071678), (94, 0.10006796607350356), (46, 0.08807484414864906), (4033, 0.08653070912487781), (783, 0.08639487611888819), (174, 0.08596256126107997), (618, 0.08421933692282337), (85, 0.08316833754691101), (3133, 0.08003018085859594), (101, 0.07993837325612833), (1740, 0.07984652121291244), (30, 0.07968131191098515), (3466, 0.0786331619502318), (33, 0.07769094217953838), (203, 0.07716735697002322), (1177, 0.07677480919606793), (420, 0.07659840549735361), (131, 0.07525457511078404), (1406, 0.07520649962954992), (318, 0.07469850073570095), (788, 0.0738846493161906), (2229, 0.07373756427186172), (122, 0.07318834027887944), (1135, 0.07303532592126641), (2235, 0.07248656252627006), (138, 0.07245384561995602)

In [None]:
print("Your Movie Recommendations: \n")

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title = movies_data[movies_data.index == index]['title'].values[0]

  if (i < 15):
    print(title)
    i += 1

Your Movie Recommendations: 

Iron Man
Iron Man 2
Iron Man 3
Avengers: Age of Ultron
The Avengers
X-Men
The Helix... Loaded
Captain America: Civil War
X-Men: Apocalypse
Ant-Man
Made
Guardians of the Galaxy
X-Men: Days of Future Past
Super


Movie Recommendation System

In [None]:
movie_name = input("Enter Your Movie: ")

movie_list = movies_data['title'].tolist()
find_close_match = difflib.get_close_matches(movie_name, movie_list)
close_match = find_close_match[0]

movie_index = movies_data[movies_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[movie_index]))
sorted_similar_movies = sorted(similarity_score, key = lambda x: x[1], reverse = True)

print("Your Movie Recommendations: \n")

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title = movies_data[movies_data.index == index]['title'].values[0]

  if (i < 15):
    print(title)
    i += 1

