# movie recommendation system using cosine similarity

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from fuzzywuzzy import process
from IPython.display import display



In [2]:
df = pd.read_csv('assets/cosine_similarity.csv')
df = df.dropna()
display(df.sample(5))

Unnamed: 0,movieId,title,genres
44834,169082,The Clearstream Affair (2015),Thriller
3125,3218,Poison (1991),Drama
52371,185137,Nadar solo (2003),Drama
60778,204132,Secret Obsession (2019),Drama|Thriller
53541,187629,The Legend of Johnny Lingo (2003),Action|Adventure|Children


In [3]:
# define tf-idf vectorizer to transform the genre text into vectors

tfidf = TfidfVectorizer(stop_words='english')

# fit and transform the genres coloumn into a matrix of tf-idf fearures

tfidf_matrix = tfidf.fit_transform(df['genres'])

# compute the cosine similarity matrix

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [4]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Find the closest match to the title
    closest_match = process.extractOne(title, df['title'])[0]
    
    # Get the index of the movie that matches the closest title
    idx = df[df['title'] == closest_match].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    return df['title'].iloc[movie_indices]

In [8]:
# testing

movie_title = 'Poison'
results = get_recommendations(movie_title)

if isinstance(results, str):
    print(results)
else:
    print(f"Recommendations similar to {movie_title}:")
    for i, movie in enumerate(results, 1):
        print(f"{i}. {movie}")

Recommendations similar to Poison:
1. Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)
2. Dead Man Walking (1995)
3. Hate (Haine, La) (1995)
4. Young Poisoner's Handbook, The (1995)
5. The Glass Shield (1994)
6. Heavenly Creatures (1994)
7. Little Odessa (1994)
8. New Jersey Drive (1995)
9. Once Were Warriors (1994)
10. Shawshank Redemption, The (1994)
