In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
df = pd.read_csv('netflix_titles.csv',low_memory=True)[:6000]
df

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...
5995,s5996,Movie,1st Summoning,Raymond Wood,"Hayley Lovitt, Teddy Cole, Brook Todd, Ace Har...",United States,"July 26, 2019",2018,TV-MA,94 min,"Horror Movies, Independent Movies",Student filmmakers uncover occult rituals tied...
5996,s5997,Movie,20 Feet From Stardom,Morgan Neville,"Darlene Love, Merry Clayton, Lisa Fischer, Tát...",United States,"September 22, 2018",2013,PG-13,91 min,"Documentaries, Music & Musicals",Winner of the 2014 Academy Award for Best Docu...
5997,s5998,Movie,2015 Dream Concert,,"4Minute, B1A4, BtoB, ELSIE, EXID, EXO, Got7, I...",South Korea,"April 28, 2017",2015,TV-PG,107 min,"International Movies, Music & Musicals",The world's biggest K-pop festival marked its ...
5998,s5999,Movie,2036 Origin Unknown,Hasraf Dulull,"Katee Sackhoff, Ray Fearon, Julie Cox, Steven ...",United Kingdom,"December 20, 2018",2018,TV-14,95 min,Sci-Fi & Fantasy,Working with an artificial intelligence to inv...


In [3]:
# Replace NaN with an empty string
df['description'] = df['description'].fillna('')


In [4]:
# Create a TfidfVectorizer and Remove stopwords
tfidf = TfidfVectorizer(stop_words='english')
# Fit and transform the data to a tfidf matrix
tfidf_matrix = tfidf.fit_transform(df['description'])
# Print the shape of the tfidf_matrix
tfidf_matrix.shape

(6000, 15438)

In [5]:
# Compute the cosine similarity between each movie description
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# type(cosine_sim)
cosine_sim[0]

array([1.        , 0.        , 0.        , ..., 0.        , 0.        ,
       0.03045651])

In [6]:
indices = pd.Series(df.index, index=df['title']).drop_duplicates()
indices
# type(indices)

title
Dick Johnson Is Dead        0
Blood & Water               1
Ganglands                   2
Jailbirds New Orleans       3
Kota Factory                4
                         ... 
1st Summoning            5995
20 Feet From Stardom     5996
2015 Dream Concert       5997
2036 Origin Unknown      5998
20th Century Women       5999
Length: 6000, dtype: int64

In [7]:
import numpy as np

# df.to_csv("few_movies.csv")
indices.to_csv("indices_df.csv",index=False)
# np.save('ndarray_data.npy', cosine_sim)
type(indices)

pandas.core.series.Series

In [8]:
def get_recommendations(title, cosine_sim=cosine_sim, num_recommend = 10):
    print(title,num_recommend,"***************************")
    idx = indices[title]
# Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 10 most similar movies
    top_similar = sim_scores[1:num_recommend+1]
# Get the movie indices
    movie_indices = [i[0] for i in top_similar]
# Return the top 10 most similar movies
    return df['title'].iloc[movie_indices]

In [11]:
print("This is Movie Recommendation System")
print("Give the movie name and also the no of movies that you want to get recommended")

user_input_movie = input("Enter Name")
user_input_rec_count = int(input("Enter No of movies "))
if user_input_movie != "" or user_input_rec_count != "":
    movies = get_recommendations(user_input_movie, num_recommend = user_input_rec_count)
    print(movies)
else:
    print("Please donot make any mistakes.")


This is Movie Recommendation System
Give the movie name and also the no of movies that you want to get recommended
Kota Factory 3 ***************************
2091             The Bridge Curse
4076    She's Dating the Gangster
266          The Creative Indians
Name: title, dtype: object
