# Movie Reccomendation System

In [1]:
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
m_data = pd.read_csv("C:\\Users\\Ishika Chandwadkar\\Downloads\\movie_dataset.csv")

In [3]:
m_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [4]:
m_data.shape

(4803, 24)

In [5]:
selected_features= ['genres','keywords','tagline','cast','director']

In [6]:
print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [7]:
for feature in selected_features:
    m_data[feature] = m_data[feature].fillna('')

In [8]:
combined_features = m_data['genres']+' '+m_data['keywords']+' '+m_data['tagline']+' '+m_data['cast']+' '+m_data['director']

In [9]:
print(combined_features)

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


In [12]:
vectorizer = TfidfVectorizer() #Term Frequency-Inverse Document Frequency Vectorizer." It combines the concepts of term frequency
#(TF) and inverse document frequency (IDF) to calculate a numerical representation of text data.
#It is used for converting a collection of raw documents into a numerical representation suitable for machine learning algorithms.
#1. Term Frequency (TF): It calculates the frequency of each term (word) in a document. The assumption is that the more frequent 
#   a term appears in a document, the more important it is to that document.
#2. Inverse Document Frequency (IDF): It calculates the inverse document frequency for each term. IDF measures how 
#   important a term is across the entire corpus of documents. It assigns higher weights to terms that are less frequent in the entire corpus.
#3. TF-IDF Calculation: The TF-IDF score for a term in a document is calculated as the product of the term's TF and IDF values. 
#   This score reflects the importance of the term in the document relative to the entire corpus.

In [13]:
feature_vectors = vectorizer.fit_transform(combined_features) #It performs two operations: fitting the vectorizer on the training data and transforming the training data into a numerical representation.

In [14]:
similarity = cosine_similarity(feature_vectors) #Cosine similarity is a metric used to measure the similarity between 
# two vectors. It is commonly used in information retrieval, natural language processing, and recommendation systems.

In [15]:
print(similarity)

[[1.         0.07219487 0.037733   ... 0.         0.         0.        ]
 [0.07219487 1.         0.03281499 ... 0.03575545 0.         0.        ]
 [0.037733   0.03281499 1.         ... 0.         0.05389661 0.        ]
 ...
 [0.         0.03575545 0.         ... 1.         0.         0.02651502]
 [0.         0.         0.05389661 ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.02651502 0.         1.        ]]


In [16]:
print(similarity.shape)

(4803, 4803)


In [17]:
m_name= input('Enter your favourite movie name: ')

Enter your favourite movie name: Avatar


In [18]:
list_of_all_titles = m_data['title'].tolist()

In [19]:
find_close_match = difflib.get_close_matches(m_name, list_of_all_titles)

In [20]:
print(find_close_match)

['Avatar']


In [21]:
close_match = find_close_match[0]
print(close_match)

Avatar


In [22]:
index_of_the_movie = m_data[m_data.title == close_match]['index'].values[0]

In [23]:
print(index_of_the_movie)

0


In [24]:
similarity_score = list(enumerate(similarity[index_of_the_movie]))

In [25]:
len(similarity_score)

4803

In [26]:
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

In [27]:
print('Movies suggested for you: \n')
i=1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = m_data[m_data.index==index]['title'].values[0]
    if i<30:
        print(i, '.',title_from_index)
        i+=1

Movies suggested for you: 

1 . Avatar
2 . Alien
3 . Aliens
4 . Guardians of the Galaxy
5 . Star Trek Beyond
6 . Star Trek Into Darkness
7 . Galaxy Quest
8 . Alien³
9 . Cargo
10 . Trekkies
11 . Gravity
12 . Moonraker
13 . Jason X
14 . Pocahontas
15 . Space Cowboys
16 . The Helix... Loaded
17 . Lockout
18 . Event Horizon
19 . Space Dogs
20 . Machete Kills
21 . Gettysburg
22 . Clash of the Titans
23 . Star Wars: Clone Wars: Volume 1
24 . The Right Stuff
25 . Terminator Salvation
26 . The Astronaut's Wife
27 . Planet of the Apes
28 . Star Trek
29 . Wing Commander


In [31]:
m_name = input('Enter your favourite movie name: ')
list_of_all_titles = m_data['title'].tolist()
find_close_mtch = difflib.get_close_matches(m_name, list_of_all_titles)
close_match = find_close_match[0]
index_of_the_movie = m_data[m_data.title == close_match]['index'].values[0]
similarity_score = list(enumerate(similarity[index_of_the_movie]))
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print('Movies suggested for you: \n')
i=1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = m_data[m_data.index==index]['title'].values[0]
    if i<30:
        print(i, '.',title_from_index)
        i+=1

Enter your favourite movie name: John Carter
Movies suggested for you: 

1 . Avatar
2 . Alien
3 . Aliens
4 . Guardians of the Galaxy
5 . Star Trek Beyond
6 . Star Trek Into Darkness
7 . Galaxy Quest
8 . Alien³
9 . Cargo
10 . Trekkies
11 . Gravity
12 . Moonraker
13 . Jason X
14 . Pocahontas
15 . Space Cowboys
16 . The Helix... Loaded
17 . Lockout
18 . Event Horizon
19 . Space Dogs
20 . Machete Kills
21 . Gettysburg
22 . Clash of the Titans
23 . Star Wars: Clone Wars: Volume 1
24 . The Right Stuff
25 . Terminator Salvation
26 . The Astronaut's Wife
27 . Planet of the Apes
28 . Star Trek
29 . Wing Commander
