In [3]:
import os
import sys
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# import custom libraries
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
from libs import data_utils

## Cosine Similarity

In [4]:
# Read last processed movies_cleaned_data
movies_cleaned_data = data_utils.read_last_processed_data('movies_cleaned_data')

# Read last processed combined_features
combined_features = data_utils.read_last_processed_data('combined_features_pca100', file_type = 'pkl')
doc_sim = cosine_similarity(combined_features)
doc_sim_df = pd.DataFrame(doc_sim)

Reading c:\Users\MULTIVISION\projects\movie-recommendation-system/data/processed/combined_features_pca100_2024-07-12.pkl...


In [20]:
def get_recommendations(movies_cleaned_data, doc_sim_df, movie_title=None, num_movies=5):

    # Get movie
    if movie_title == None:    
        movie_title = input('Tell me a you liked.')
        
    movie_title = movie_title.title()

    movies_titles = list(movies_cleaned_data['title'])

    if movie_title in movies_titles:
        # find movie id
        movie_idx = movies_titles.index(movie_title)

        # get movie similarities
        movie_similarities = doc_sim_df.iloc[movie_idx].values

        # get top 5 similar movie IDs
        similar_movie_idxs = np.argsort(-movie_similarities)[1:num_movies+1]

        # get top num_movies movies
        similar_movies = [movies_titles[i] for i in similar_movie_idxs]

        print(similar_movies)
    else:
        print("Sorry.. Don't recognized that movie.")

In [21]:
get_recommendations(movies_cleaned_data, doc_sim_df, movie_title='Toy Story')

['Toy Story 3', 'Toy Story 2', 'Stuart Little 2', 'Ted', 'Pinocchio']


### Call get_recommendations for popular movies

In [16]:
movies_cleaned_data.sort_values('popularity', ascending=False).head(10)

Unnamed: 0.1,Unnamed: 0,id,title,overview,genres,popularity,vote_average,release_year,keywords,title_wrangled
30389,30389,211672,Minions,"['minion', 'recruit', 'overkil', 'super', 'vil...","['famili', 'anim', 'adventur', 'comedi']",547.488298,6.4,2015.0,"['assist', 'aftercreditssting', 'duringcredits...",['minion']
32932,32932,297762,Wonder Woman,"['amazon', 'princess', 'come', 'world', 'man',...","['action', 'adventur', 'fantasi']",294.337037,7.2,2017.0,"['dc comic', 'hero', 'greek mytholog', 'island...","['wonder', 'woman']"
42386,42386,321612,Beauty and the Beast,"['live', 'action', 'adapt', 'disney', 'version...","['famili', 'fantasi', 'romanc']",287.253654,6.8,2017.0,"['magic', 'castl', 'fairy tal', 'music', 'curs...","['beauti', 'beast']"
43745,43745,339403,Baby Driver,"['coerc', 'work', 'crime', 'boss', 'young', 'g...","['action', 'crime']",228.032744,7.2,2017.0,"['robberi', 'music', 'crime boss', 'romanc', '...","['babi', 'driver']"
24294,24294,177572,Big Hero 6,"['special', 'bond', 'develop', 'plu', 'size', ...","['adventur', 'famili', 'anim', 'action', 'come...",213.849907,7.8,2014.0,"['brother brother relationship', 'hero', 'tale...","['big', 'hero']"
26355,26355,293660,Deadpool,"['deadpool', 'tell', 'origin', 'stori', 'forme...","['action', 'adventur', 'comedi']",187.860492,7.4,2016.0,"['anti hero', 'mercenari', 'marvel com', 'supe...",['deadpool']
26357,26357,283995,Guardians of the Galaxy Vol. 2,"['guardian', 'must', 'fight', 'keep', 'newfoun...","['action', 'adventur', 'comedi', 'science fict']",185.330992,7.6,2017.0,"['sequel', 'superhero', 'based on com', 'misfi...","['guardian', 'galaxi', 'vol']"
14527,14527,19995,Avatar,"['nd', 'centuri', 'parapleg', 'marin', 'dispat...","['action', 'adventur', 'fantasi', 'science fict']",185.070892,7.2,2009.0,"['culture clash', 'futur', 'space war', 'space...",['avatar']
24190,24190,245891,John Wick,"['ex', 'lunat', 'wick', 'come', 'med', 'track'...","['action', 'thriller']",183.870374,7.0,2014.0,"['hitman', 'russian mafia', 'reveng', 'murder'...",['wick']
23526,23526,210577,Gone Girl,"['wife', 'disappear', 'becom', 'focu', 'intens...","['mysteri', 'thriller', 'drama']",154.801009,7.9,2014.0,"['based on novel', 'marriage crisi', 'disappea...","['gone', 'girl']"


In [25]:
popular_movies = movies_cleaned_data.sort_values('popularity', ascending=False).head(10)
for movie in popular_movies['title']:
  print('\nMovie:', movie)
  print('Top 5 recommended Movies:')
  get_recommendations(movies_cleaned_data, doc_sim_df, movie_title=movie)


Movie: Minions
Top 5 recommended Movies:
['Cats & Dogs 2 : The Revenge of Kitty Galore', 'Despicable Me 3', 'Winnie the Pooh', 'Penguins of Madagascar', 'Tom and Jerry Spy Quest']

Movie: Wonder Woman
Top 5 recommended Movies:
['Green Lantern: Emerald Knights', 'Justice League', 'Justice League: The New Frontier', 'The Flash 2 - Revenge of the Trickster', 'The Flash 2 - Revenge of the Trickster']

Movie: Beauty and the Beast
Top 5 recommended Movies:
Sorry.. Don't recognized that movie.

Movie: Baby Driver
Top 5 recommended Movies:
['Transit', 'The Driver', "Watch Out, We're Mad", 'Executive Target', 'White Line Fever']

Movie: Big Hero 6
Top 5 recommended Movies:
['Brother Bear', "VeggieTales: The Pirates Who Don't Do Anything", 'Honey, We Shrunk Ourselves', 'Grimsby', 'Grimsby']

Movie: Deadpool
Top 5 recommended Movies:
['Thor: The Dark World', 'Ant-Man', 'Iron Man 2', 'The Avengers', 'Marvel One-Shot: Item 47']

Movie: Guardians of the Galaxy Vol. 2
Top 5 recommended Movies:
Sorry