# Movie Recommendation System

### 1. Library Imports

Import all nessary libraries for data manipulation, text processing, similarity computation, and GUI creation.

In [1]:
# Import all the necessary libraries
import pandas as pd
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import tkinter as tk
from fuzzywuzzy import process

### 2. Data Loading

Load the movie and credits dataset which containinformation about movies and their respective credits.

In [2]:
# Load the movie dataset
movies = pd.read_csv('tmdb_5000_movies.csv')

In [3]:
movies.head(3)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466


In [4]:
# Load the credits dataset
credit = pd.read_csv('tmdb_5000_credits.csv')

In [5]:
credit.head(3)

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


### 3. Data Merging

Merge the dataset on the 'title' column to combine relevant information into one dataframe.

In [6]:
# Merge the datasets on the title coulmn
movies = movies.merge(credit, on='title')

In [7]:
movies.head(3)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,206647,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


In [8]:
# Print the structure of the merged dataset
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4809 entries, 0 to 4808
Data columns (total 23 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4809 non-null   int64  
 1   genres                4809 non-null   object 
 2   homepage              1713 non-null   object 
 3   id                    4809 non-null   int64  
 4   keywords              4809 non-null   object 
 5   original_language     4809 non-null   object 
 6   original_title        4809 non-null   object 
 7   overview              4806 non-null   object 
 8   popularity            4809 non-null   float64
 9   production_companies  4809 non-null   object 
 10  production_countries  4809 non-null   object 
 11  release_date          4808 non-null   object 
 12  revenue               4809 non-null   int64  
 13  runtime               4807 non-null   float64
 14  spoken_languages      4809 non-null   object 
 15  status               

### 4. Data Selection

Select relevant columns that help describe the movie's content for the recommendation system.

In [9]:
# Select the relevant columns for the recommendation system
movies =  movies[['title','genres','keywords','tagline','cast','crew']]

In [10]:
movies.head(3)

Unnamed: 0,title,genres,keywords,tagline,cast,crew
0,Avatar,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",Enter the World of Pandora.,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,Pirates of the Caribbean: At World's End,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","At the end of the world, the adventure begins.","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,Spectre,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",A Plan No One Escapes,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


### 5. Missing Values Handling

Handle and apply a function to convert JSON strings in the 'genres' and 'keywords' columns to lists.

In [11]:
# Check for missing values
movies.isnull().sum()

title         0
genres        0
keywords      0
tagline     844
cast          0
crew          0
dtype: int64

In [12]:
# Handle missing values in the 'tagline' column by filling with an empty string
movies['tagline'] = movies['tagline'].fillna('')

In [13]:
# Verify if missing values have been handled
movies.isnull().sum()

title       0
genres      0
keywords    0
tagline     0
cast        0
crew        0
dtype: int64

In [14]:
# Check for any duplicate values
movies.duplicated().sum()

0

### 6. Data Conversion

Define and apply a function to convert JSON strings in the 'genres' and 'keywords' columns to lists.

In [15]:
# Function to convert JSON string to list of names
def convert_json_to_list(json_str):
    if pd.isna(json_str):
        return []
    else:
        return [item['name'] for item in ast.literal_eval(json_str)]
movies['genres'] = movies['genres'].apply(convert_json_to_list)
movies['keywords'] = movies['keywords'].apply(convert_json_to_list)

In [16]:
movies['genres'].head(3)

0    [Action, Adventure, Fantasy, Science Fiction]
1                     [Adventure, Fantasy, Action]
2                       [Action, Adventure, Crime]
Name: genres, dtype: object

In [17]:
movies['keywords'].head(3)

0    [culture clash, future, space war, space colon...
1    [ocean, drug abuse, exotic island, east india ...
2    [spy, based on novel, secret agent, sequel, mi...
Name: keywords, dtype: object

In [18]:
# Convert lists to strings for concatenation
movies['genres'] = movies['genres'].apply(lambda x: ', '.join(x) if isinstance(x, list) else '')
movies['keywords'] = movies['keywords'].apply(lambda x: ', '.join(x) if isinstance(x, list) else '')

### 7. Content Creation

Convert lists to strings and create a 'content' column by concatenating 'tagline', 'genres', and 'keywords'.

In [19]:
# Create a 'content' column by concatenting 'tagline', 'genres', and 'keywords'
movies['content'] = movies['tagline'] + ' ' + movies['genres'] + ' ' + movies['keywords']
movies['content'].head(3)

0    Enter the World of Pandora. Action, Adventure,...
1    At the end of the world, the adventure begins....
2    A Plan No One Escapes Action, Adventure, Crime...
Name: content, dtype: object

### 8. TF-IDF Matrix

Create a TF-IDF matrix from the 'content' column to vectorize the text data for similarity computation.

In [20]:
# Create a TF-IDF matrix from the 'content' column
tfidf = TfidfVectorizer(stop_words = 'english')
tfidf_matrix = tfidf.fit_transform(movies['content'])

### 9. Cosine Similarity
Compute cosine similarity matrix from the TF-IDF matrix to find similarities between movies.

In [21]:
# Compute cosine similarity matrix from the TF-IDF matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

### 10. Recommendation Function
Define a function to get movie recommendations based on an input title, utilizing cosine similarity.

In [22]:
# Function to get movie recommendations based on the input title

def get_recommendation(title,cosine_sim=cosine_sim):
    # Convert the title to lowercase
    title = title.lower()
    
    # Check if the title exists in the dataset
    if title in movies['title'].str.lower().values:
        idx = movies[movies['title'].str.lower() == title].index
        if len(idx) ==0:
            return "No matching title found"
        else:
            idx =idx[0]
            # Get similarity scores for the movie
            sim_scores=list(enumerate(cosine_sim[idx]))
            # Sort the movies based on similarity scores
            sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
            # Get the indices of the top 10 most similar movies
            sim_scores = sim_scores[1:11]
            movie_indices =[i[0] for i in sim_scores]
            # Return the titles of the recommended movies
            return movies['title'].iloc[movie_indices].tolist()
    else:
        # If title not found, use fuzzy matching to suggest similar titles
        similar_titles = process.extract(title,movies['title'].str.lower(),limit=1)
        if similar_titles:
            return ["Did you mean {}? If yes, here are some recommendations:".format(similar_titles[0][0])] + get_recommendation(similar_titles[0][0])
        else:
            return "No matching title found"

### 11. Event Handling

Handle the submit button click event in the GUI to display movie recommendations.

In [23]:
# Function to handle the submit button click event
def on_submit():
    title = entry.get()
    result_label.config(text=f'Recommended movies for {title}:')
    recommendations = get_recommendation(title)
    result_text.delete(1.0, tk.END)
    result_text.insert(tk.END,'\n'.join(recommendations))

### 12. GUI Creation

Create a simple GUI using Tkinter to allow users to input a movie title and get recommendations, then run the Tkinter main loop to display the GUI.

In [None]:
# Create the GUI
root = tk.Tk()
root.title('Movie Recommender System')

# Label for movie title entry
label = tk.Label(root,text='Enter a movie title:')
label.pack()

# Entry widget to input the movie title
entry = tk.Entry(root,width=50)
entry.pack()

# Button to submit the movie title and get recommendations
submit_button = tk.Button(root,text='Get Recommendations', command=on_submit)
submit_button.pack()

# Label to display the result
result_label = tk.Label(root,text='')
result_label.pack()

# Text widget to display the list of recommended movies
result_text = tk.Text(root, height=11, width=60)
result_text.pack()

# Run the Tkinter main loop
root.mainloop()