In [54]:
import warnings
warnings.simplefilter("ignore")

In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import KFold, cross_val_score
from sklearn import metrics 
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics.pairwise import cosine_similarity

In [56]:
movies = pd.read_csv('/kaggle/input/9000-movies-dataset/mymoviedb.csv', lineterminator='\n')
movies

Unnamed: 0,Release_Date,Title,Overview,Popularity,Vote_Count,Vote_Average,Original_Language,Genre,Poster_Url
0,2021-12-15,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,5083.954,8940,8.3,en,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...
1,2022-03-01,The Batman,"In his second year of fighting crime, Batman u...",3827.658,1151,8.1,en,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...
2,2022-02-25,No Exit,Stranded at a rest stop in the mountains durin...,2618.087,122,6.3,en,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...
3,2021-11-24,Encanto,"The tale of an extraordinary family, the Madri...",2402.201,5076,7.7,en,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...
4,2021-12-22,The King's Man,As a collection of history's worst tyrants and...,1895.511,1793,7.0,en,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...
...,...,...,...,...,...,...,...,...,...
9822,1973-10-15,Badlands,A dramatization of the Starkweather-Fugate kil...,13.357,896,7.6,en,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...
9823,2020-10-01,Violent Delights,A female vampire falls in love with a man she ...,13.356,8,3.5,es,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...
9824,2016-05-06,The Offering,When young and successful reporter Jamie finds...,13.355,94,5.0,en,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...
9825,2021-03-31,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,13.354,152,6.7,en,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...


In [57]:
movies.columns

Index(['Release_Date', 'Title', 'Overview', 'Popularity', 'Vote_Count',
       'Vote_Average', 'Original_Language', 'Genre', 'Poster_Url'],
      dtype='object')

In [58]:
movies.isna().sum()

Release_Date         0
Title                0
Overview             0
Popularity           0
Vote_Count           0
Vote_Average         0
Original_Language    0
Genre                0
Poster_Url           0
dtype: int64

In [59]:
def extract_years(df, col):
    df[col] = pd.to_datetime(df[col], errors='coerce')
    df['Release_Year'] = df[col].dt.year
    df.drop(columns=['Release_Date'], inplace=True)
    return df

In [60]:
movies = extract_years(movies, 'Release_Date')
movies

Unnamed: 0,Title,Overview,Popularity,Vote_Count,Vote_Average,Original_Language,Genre,Poster_Url,Release_Year
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,5083.954,8940,8.3,en,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021
1,The Batman,"In his second year of fighting crime, Batman u...",3827.658,1151,8.1,en,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022
2,No Exit,Stranded at a rest stop in the mountains durin...,2618.087,122,6.3,en,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022
3,Encanto,"The tale of an extraordinary family, the Madri...",2402.201,5076,7.7,en,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021
4,The King's Man,As a collection of history's worst tyrants and...,1895.511,1793,7.0,en,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021
...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,13.357,896,7.6,en,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973
9823,Violent Delights,A female vampire falls in love with a man she ...,13.356,8,3.5,es,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020
9824,The Offering,When young and successful reporter Jamie finds...,13.355,94,5.0,en,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,13.354,152,6.7,en,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021


In [61]:
movies.describe()

Unnamed: 0,Popularity,Vote_Count,Vote_Average,Release_Year
count,9827.0,9827.0,9827.0,9827.0
mean,40.326088,1392.805536,6.439534,2006.203623
std,108.873998,2611.206907,1.129759,15.685554
min,13.354,0.0,0.0,1902.0
25%,16.1285,146.0,5.9,2000.0
50%,21.199,444.0,6.5,2011.0
75%,35.1915,1376.0,7.1,2017.0
max,5083.954,31077.0,10.0,2024.0


In [64]:
def categorize_popularity(df):
    bins = [0, 200, 500, 700, df['Popularity'].max()]
    labels = ['Low', 'Medium', 'High', 'Very High']
    df['Popularity_'] = pd.cut(df['Popularity'], bins = bins, labels = labels, include_lowest=True)
    df.drop(columns=['Popularity'], inplace=True)
    return df

In [65]:
movies = categorize_popularity(movies)
movies

Unnamed: 0,Title,Overview,Vote_Count,Vote_Average,Original_Language,Genre,Poster_Url,Release_Year,Popularity_
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8940,8.3,en,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High
1,The Batman,"In his second year of fighting crime, Batman u...",1151,8.1,en,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High
2,No Exit,Stranded at a rest stop in the mountains durin...,122,6.3,en,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High
3,Encanto,"The tale of an extraordinary family, the Madri...",5076,7.7,en,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High
4,The King's Man,As a collection of history's worst tyrants and...,1793,7.0,en,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High
...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,896,7.6,en,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low
9823,Violent Delights,A female vampire falls in love with a man she ...,8,3.5,es,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low
9824,The Offering,When young and successful reporter Jamie finds...,94,5.0,en,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,152,6.7,en,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low


In [66]:
movies.drop(columns=['Vote_Count'], inplace=True)

In [67]:
movies

Unnamed: 0,Title,Overview,Vote_Average,Original_Language,Genre,Poster_Url,Release_Year,Popularity_
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,en,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,en,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,en,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,en,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High
4,The King's Man,As a collection of history's worst tyrants and...,7.0,en,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High
...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,en,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,es,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low
9824,The Offering,When young and successful reporter Jamie finds...,5.0,en,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,en,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low


In [68]:
movies['Original_Language'].unique()

array(['en', 'ja', 'fr', 'hi', 'es', 'ru', 'de', 'th', 'ko', 'tr', 'cn',
       'zh', 'it', 'pt', 'ml', 'pl', 'fi', 'no', 'da', 'id', 'sv', 'nl',
       'te', 'sr', 'is', 'ro', 'tl', 'fa', 'uk', 'nb', 'eu', 'lv', 'ar',
       'el', 'cs', 'ms', 'bn', 'ca', 'la', 'ta', 'hu', 'he', 'et'],
      dtype=object)

In [69]:
def map_language_codes_to_full_names(df):
    language_map = {
        'en': 'English', 'ja': 'Japanese', 'fr': 'French', 'hi': 'Hindi', 'es': 'Spanish',
        'ru': 'Russian', 'de': 'German', 'th': 'Thai', 'ko': 'Korean', 'tr': 'Turkish',
        'cn': 'Chinese', 'zh': 'Chinese', 'it': 'Italian', 'pt': 'Portuguese', 'ml': 'Malayalam',
        'pl': 'Polish', 'fi': 'Finnish', 'no': 'Norwegian', 'da': 'Danish', 'id': 'Indonesian',
        'sv': 'Swedish', 'nl': 'Dutch', 'te': 'Telugu', 'sr': 'Serbian', 'is': 'Icelandic',
        'ro': 'Romanian', 'tl': 'Tagalog', 'fa': 'Persian', 'uk': 'Ukrainian', 'nb': 'Norwegian Bokmål',
        'eu': 'Basque', 'lv': 'Latvian', 'ar': 'Arabic', 'el': 'Greek', 'cs': 'Czech', 'ms': 'Malay',
        'bn': 'Bengali', 'ca': 'Catalan', 'la': 'Latin', 'ta': 'Tamil', 'hu': 'Hungarian', 
        'he': 'Hebrew', 'et': 'Estonian'
    }
    
    df['Original_Language_Full'] = df['Original_Language'].map(language_map)
    df.drop(columns=['Original_Language'], inplace = True)
    
    return df

In [70]:
movies = map_language_codes_to_full_names(movies)
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English
...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English


In [72]:
def categorize_year(df):
    bins = [1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020, df['Release_Year'].max()]
    labels = [1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020]
    df['Release_Era'] = pd.cut(df['Release_Year'], bins = bins, labels = labels, include_lowest=True)
    return df

In [73]:
movies = categorize_year(movies)
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020
...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020


In [74]:
movies['Title'].value_counts()

Title
Beauty and the Beast                      4
Alice in Wonderland                       4
The Little Mermaid                        3
The Call                                  3
Halloween                                 3
                                         ..
There's Something About Mary              1
Amores Perros                             1
The Human Centipede 3 (Final Sequence)    1
Newness                                   1
Threads                                   1
Name: count, Length: 9513, dtype: int64

In [75]:
movies['Title'].duplicated().sum()

314

In [76]:
movies = movies.drop_duplicates(subset='Title', keep='first')

In [77]:
movies['Title'].duplicated().sum()

0

In [78]:
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020
...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020


In [79]:
movies['Title'].value_counts()

Title
Spider-Man: No Way Home                               1
Phineas and Ferb: Mission Marvel                      1
A Brighter Summer Day                                 1
Killers Anonymous                                     1
I.T.                                                  1
                                                     ..
Inuyasha the Movie 3: Swords of an Honorable Ruler    1
Remember the Titans                                   1
Leroy & Stitch                                        1
Bingo Hell                                            1
Threads                                               1
Name: count, Length: 9513, dtype: int64

In [80]:
label_encoder = LabelEncoder()
movies['Genre_Encoded'] = label_encoder.fit_transform(movies['Genre'])

In [81]:
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era,Genre_Encoded
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020,93
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020,1132
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020,2112
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020,689
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020,110
...,...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970,1227
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010,1709
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010,1902
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020,1843


In [82]:
movies['Title_Encoded'] = label_encoder.fit_transform(movies['Title'])
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era,Genre_Encoded,Title_Encoded
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020,93,6612
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020,1132,7098
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020,2112,5110
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020,689,2467
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020,110,7795
...,...,...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970,1227,860
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010,1709,9124
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010,1902,8112
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020,1843,8555


In [83]:
movies['Popularity_Encoded'] = label_encoder.fit_transform(movies['Popularity_'])
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era,Genre_Encoded,Title_Encoded,Popularity_Encoded
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020,93,6612,3
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020,1132,7098,3
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020,2112,5110,3
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020,689,2467,3
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020,110,7795,3
...,...,...,...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970,1227,860,1
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010,1709,9124,1
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010,1902,8112,1
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020,1843,8555,1


In [84]:
movies['Language_Encoded'] = label_encoder.fit_transform(movies['Original_Language_Full'])
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era,Genre_Encoded,Title_Encoded,Popularity_Encoded,Language_Encoded
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020,93,6612,3,8
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020,1132,7098,3,8
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020,2112,5110,3,8
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020,689,2467,3,8
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020,110,7795,3,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970,1227,860,1,8
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010,1709,9124,1,34
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010,1902,8112,1,8
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020,1843,8555,1,8


In [85]:
original = movies['Genre'].unique()
unique = []
for genre in original:
    if genre.find(","):
        unique += genre.split(", ")
    else:
        unique.append(genre)
        
un = set(unique)
un

{'Action',
 'Adventure',
 'Animation',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'History',
 'Horror',
 'Music',
 'Mystery',
 'Romance',
 'Science Fiction',
 'TV Movie',
 'Thriller',
 'War',
 'Western'}

In [86]:
movies.Genre.unique()

array(['Action, Adventure, Science Fiction', 'Crime, Mystery, Thriller',
       'Thriller', ..., 'Crime, Drama, Romance, Thriller, Mystery',
       'Comedy, TV Movie, Romance', 'War, Drama, Science Fiction'],
      dtype=object)

In [87]:
movies['Genre_First_Word'] = movies['Genre'].str.split().str[0].str.replace(r'[^\w\s]', '', regex=True)
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era,Genre_Encoded,Title_Encoded,Popularity_Encoded,Language_Encoded,Genre_First_Word
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020,93,6612,3,8,Action
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020,1132,7098,3,8,Crime
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020,2112,5110,3,8,Thriller
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020,689,2467,3,8,Animation
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020,110,7795,3,8,Action
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970,1227,860,1,8,Drama
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010,1709,9124,1,34,Horror
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010,1902,8112,1,8,Mystery
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020,1843,8555,1,8,Music


In [88]:
movies.Genre_First_Word.unique()

array(['Action', 'Crime', 'Thriller', 'Animation', 'Horror', 'Science',
       'Fantasy', 'Romance', 'Drama', 'Western', 'Family', 'Comedy',
       'Adventure', 'Mystery', 'TV', 'Documentary', 'War', 'Music',
       'History'], dtype=object)

In [89]:
movies['Genre_Encoded'] = label_encoder.fit_transform(movies['Genre_First_Word'])
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era,Genre_Encoded,Title_Encoded,Popularity_Encoded,Language_Encoded,Genre_First_Word
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020,0,6612,3,8,Action
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020,4,7098,3,8,Crime
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020,16,5110,3,8,Thriller
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020,2,2467,3,8,Animation
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020,0,7795,3,8,Action
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970,6,860,1,8,Drama
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010,10,9124,1,34,Horror
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010,12,8112,1,8,Mystery
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020,11,8555,1,8,Music


In [90]:
movies['Release_Era'] = movies['Release_Era'].astype(int)
movies.Release_Era

0       2020
1       2020
2       2020
3       2020
4       2020
        ... 
9822    1970
9823    2010
9824    2010
9825    2020
9826    1980
Name: Release_Era, Length: 9513, dtype: int64

In [91]:
movies

Unnamed: 0,Title,Overview,Vote_Average,Genre,Poster_Url,Release_Year,Popularity_,Original_Language_Full,Release_Era,Genre_Encoded,Title_Encoded,Popularity_Encoded,Language_Encoded,Genre_First_Word
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,8.3,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...,2021,Very High,English,2020,0,6612,3,8,Action
1,The Batman,"In his second year of fighting crime, Batman u...",8.1,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...,2022,Very High,English,2020,4,7098,3,8,Crime
2,No Exit,Stranded at a rest stop in the mountains durin...,6.3,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...,2022,Very High,English,2020,16,5110,3,8,Thriller
3,Encanto,"The tale of an extraordinary family, the Madri...",7.7,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...,2021,Very High,English,2020,2,2467,3,8,Animation
4,The King's Man,As a collection of history's worst tyrants and...,7.0,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...,2021,Very High,English,2020,0,7795,3,8,Action
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9822,Badlands,A dramatization of the Starkweather-Fugate kil...,7.6,"Drama, Crime",https://image.tmdb.org/t/p/original/z81rBzHNgi...,1973,Low,English,1970,6,860,1,8,Drama
9823,Violent Delights,A female vampire falls in love with a man she ...,3.5,Horror,https://image.tmdb.org/t/p/original/4b6HY7rud6...,2020,Low,Spanish,2010,10,9124,1,34,Horror
9824,The Offering,When young and successful reporter Jamie finds...,5.0,"Mystery, Thriller, Horror",https://image.tmdb.org/t/p/original/h4uMM1wOhz...,2016,Low,English,2010,12,8112,1,8,Mystery
9825,The United States vs. Billie Holiday,Billie Holiday spent much of her career being ...,6.7,"Music, Drama, History",https://image.tmdb.org/t/p/original/vEzkxuE2sJ...,2021,Low,English,2020,11,8555,1,8,Music


In [92]:
X = movies[['Genre_Encoded', 'Popularity_Encoded', 'Language_Encoded', 'Release_Era']]

similarity_matrix = cosine_similarity(X)

def recommend_movies(movie_title_encoded, similarity_matrix, movie_titles, top_n=5):
    # Find the index of the encoded movie title
    movie_idx = np.where(movie_titles == movie_title_encoded)[0][0]
    
    # Get the similarity scores for the specified movie
    similar_movies = list(enumerate(similarity_matrix[movie_idx]))
    
    # Sort the movies based on similarity score, descending
    similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)
    
    # Get top N similar movies
    recommendations = similar_movies[0:top_n]
    
    # Return the titles of recommended movies
    return [movie_titles[i[0]] for i in recommendations]

movie_titles = movies['Title_Encoded'].values

In [93]:
def get_encoded_title_by_features(Genre, Popularity, Language, Release_Era, movies_df):
    # Filter the DataFrame based on provided feature values
    filtered_movies = movies_df[
        (movies_df['Genre_First_Word'] == Genre) &
        (movies_df['Popularity_'] == Popularity) &
        (movies_df['Original_Language_Full'] == Language) &
        (movies_df['Release_Era'] == Release_Era)
    ]
    
    
    if not filtered_movies.empty:
        return filtered_movies['Title_Encoded'].values
    else:
        return "No movie found with the specified feature values."

# Example usage
# Replace the values with actual encoded values you want to query
genre_input = 'Action'  # Example value for Genre_Encoded
popularity_input = 'Very High'  # Example value for Popularity_Encoded
language_input = 'English'  # Example value for Language_Encoded
release_era_input = 2020  # Example value for Release_Era

# Get the encoded titles based on the input values
encoded_titles = get_encoded_title_by_features(
    genre_input,
    popularity_input,
    language_input,
    release_era_input,
    movies
)

print("Encoded Titles:", encoded_titles[0])


Encoded Titles: 6612


In [94]:




some_movie_encoded = encoded_titles[0]

recommended_movies = recommend_movies(some_movie_encoded, similarity_matrix, movie_titles, top_n=5)

print("Recommended Movies:", recommended_movies)

Recommended Movies: [6612, 7795, 7264, 2742, 5742]


In [95]:
import pandas as pd

# Assuming movies is your DataFrame with all the relevant columns

def get_specific_movie_details_by_encoded_title(title_encoded, movies_df, columns):
    # Filter the DataFrame to find the row with the specified Title_Encoded
    movie_details = movies_df[movies_df['Title_Encoded'] == title_encoded]
    
    # Check if any movie details are found
    if not movie_details.empty:
        # Return the specified columns
        return movie_details[columns].iloc[0]  # Return the first matching row as a Series
    else:
        return None  # Return None if no movie is found

# Example usage
# Replace these with actual encoded titles from your dataset
encoded_titles_input = recommended_movies  # Example values for Title_Encoded

# Specify the columns you want to retrieve
columns_to_retrieve = ['Title', 'Overview','Genre', 'Popularity_', 'Vote_Average', 'Poster_Url']

# Loop through each encoded title and get the movie details
for title_encoded_input in encoded_titles_input:
    movie_info = get_specific_movie_details_by_encoded_title(title_encoded_input, movies, columns_to_retrieve)
    
    # Print the specific movie details
    if movie_info is not None:
        print(f"\nMovie Details for Title Encoded {title_encoded_input}:")
        for col in columns_to_retrieve:
            print(f"{col}: {movie_info[col]}")
    else:
        print(f"No movie found with Title Encoded {title_encoded_input}.")



Movie Details for Title Encoded 6612:
Title: Spider-Man: No Way Home
Overview: Peter Parker is unmasked and no longer able to separate his normal life from the high-stakes of being a super-hero. When he asks for help from Doctor Strange the stakes become even more dangerous, forcing him to discover what it truly means to be Spider-Man.
Genre: Action, Adventure, Science Fiction
Popularity_: Very High
Vote_Average: 8.3
Poster_Url: https://image.tmdb.org/t/p/original/1g0dhYtq4irTY1GPXvft6k4YLjm.jpg

Movie Details for Title Encoded 7795:
Title: The King's Man
Overview: As a collection of history's worst tyrants and criminal masterminds gather to plot a war to wipe out millions, one man must race against time to stop them.
Genre: Action, Adventure, Thriller, War
Popularity_: Very High
Vote_Average: 7.0
Poster_Url: https://image.tmdb.org/t/p/original/aq4Pwv5Xeuvj6HZKtxyd23e6bE9.jpg

Movie Details for Title Encoded 7264:
Title: The Commando
Overview: An elite DEA agent returns home after a f