In [3]:
#import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
#load the csv
data=pd.read_csv("Entertainment.csv")
data.head()

Unnamed: 0,Id,Titles,Category,Reviews
0,6973,Toy Story (1995),"Drama, Romance, School, Supernatural",-8.98
1,6778,Jumanji (1995),"Action, Adventure, Drama, Fantasy, Magic, Mili...",8.88
2,9702,Grumpier Old Men (1995),"Action, Comedy, Historical, Parody, Samurai, S...",99.0
3,6769,Waiting to Exhale (1995),"Sci-Fi, Thriller",99.0
4,1123,Father of the Bride Part II (1995),"Action, Comedy, Historical, Parody, Samurai, S...",-0.44


In [9]:
#Step1:Preprocess the "Category" columns using TF-IDF
tfidf=TfidfVectorizer(stop_words="english")#Remove common stopwords
tfidf_matrix=tfidf.fit_transform(data['Category'])#Fit and transform the category data

In [11]:
#Step2:Compute the cosine similarity between titles
cosine_sim=cosine_similarity(tfidf_matrix,tfidf_matrix)

In [27]:
#Step3:Create a function to recommend titles based on similarity
def get_recommendation(title,cosine_sim=cosine_sim):
    #Get the index of the title that matches the input title
    idx=data[data['Titles']==title].index[0]
    '''
    data['Titles]==title
    Tgis creates a boolean mask(a series of true and false values)
    indicating which rows in the Titles column match the input title. 
    For example, if the title is "Toy Story (1995)", this comparison results in something Like:
    0 True
    1 False
    2 False
    Name: Titles, dtype: bool
    Why [0] is Needed:
             Even though the title should be unique,'''
    
    #Get the pairwise similarity scores of all titles with that title
    sim_scores=list(enumerate(cosine_sim[idx]))

    #sort the title based on the similarity scores in descending order
    sim_scores=sorted(sim_scores,key=lambda x:x[1],reverse=True)

    #get the indices of the most similar titles
    sim_indices=[i[0] for i in sim_scores[1:6]]
    #Exclude the first as it is the title itself

    #return the top 5 most similar title
    return data['Titles'].iloc[sim_indices]



In [29]:
#Test the recommendation system with an example title
example_title="Toy Story (1995)"
recommend_titles=get_recommendation(example_title)

In [31]:
#Print the recommendations
print(f"Recommendation for '{example_title}':")
for title in recommend_titles:
    print(title)

Recommendation for 'Toy Story (1995)':
Othello (1995)
Sense and Sensibility (1995)
Dracula: Dead and Loving It (1995)
American President, The (1995)
When Night Is Falling (1995)
