# Recommendation System

In [1]:
from collections import Counter
from itertools import combinations
import re

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

import matplotlib.pyplot as plt
import seaborn as sns

import pickle as pk

from jupyterthemes import jtplot
jtplot.style(figsize=(15, 9))

## Data

In [2]:
movies = pd.read_csv('data/movies_dataset_CLEAN_noNaN.csv', index_col='id')

# Vectors
vectors_keywords = pd.read_csv('data/keywords_vectors.csv', index_col=0)
vectors_cast     = pd.read_csv('data/cast_vectors.csv', index_col=0)
vectors_crew     = pd.read_csv('data/crew_vectors.csv', index_col=0)

# Sum of Vectors
sum_vectors_keywords = pd.read_csv('data/keywords_sum_vectors.csv', index_col='id')
sum_vectors_cast     = pd.read_csv('data/cast_sum_vectors.csv', index_col='id')
sum_vectors_crew     = pd.read_csv('data/crew_sum_vectors.csv', index_col='id')

# Genres cosine similarity
genres_cs = pd.read_csv('DRAFT/data/cosine_similarity_genres.csv', index_col=0)

## Movies choice

In [3]:
like = [
    'From Dusk Till Dawn',
    'Alien',
    'The Descent',
]

In [4]:
like_df = movies[movies['title'].isin(like)]

In [5]:
def i_want_to_watch_something_like(like:list)->pd.DataFrame:
    return movies[movies['title'].isin(like)]

In [5]:
i_want_to_watch_something_like(like)

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
348,"[27, 878]","During its return to the earth, commercial spa...",62.215,1979-05-25,Alien,8.1,12914,"Horror, Science Fiction","[{'cast_id': 10205, 'name': 'Sigourney Weaver'...","[{'crew_id': 578, 'name': 'Ridley Scott', 'dep...","[{'id': 803, 'name': 'android'}, {'id': 1612, ...",Science Fiction,14
755,"[27, 28, 53, 80]",Seth Gecko and his younger brother Richard are...,27.675,1996-01-19,From Dusk Till Dawn,7.0,5301,"Horror, Action, Thriller, Crime","[{'cast_id': 1461, 'name': 'George Clooney', '...","[{'crew_id': 2294, 'name': 'Robert Rodriguez',...","[{'id': 246, 'name': 'dancing'}, {'id': 380, '...",Horror,10
9392,"[12, 27]","After a tragic accident, six friends reunite f...",22.417,2005-07-08,The Descent,7.0,3394,"Adventure, Horror","[{'cast_id': 57574, 'name': 'Shauna Macdonald'...","[{'crew_id': 558, 'name': 'David Julyan', 'dep...","[{'id': 1852, 'name': 'mutant'}, {'id': 1963, ...",Horror,10


In [37]:
class IWantToWatchSomethingLike:
    def __init__(self, like:list):
        self.cs_THRESHOLD = 0.95
        self.MIN_VOTE_COUNT = 1000
        self.MIN_POPULARITY = 14
        
        self.like = like
        self.like_df = movies[movies['title'].isin(self.like)]
        
        self.gather_like_info()
        self.similar_movies = self.get_similar_movies()
        self.cosine_similarity = {
            'keywords': pd.DataFrame([]),
            'cast'    : pd.DataFrame([]),
            'crew'    : pd.DataFrame([]),
        }
        self.get_cosine_similarity()
        
    def get_cosine_similarity(self)->dict:
        keywords_cs = cosine_similarity(
            sum_vectors_keywords.loc[self.like_df.index],
            sum_vectors_keywords.loc[self.similar_movies.index]
        )
        cast_cs = cosine_similarity(
            sum_vectors_cast.loc[self.like_df.index],
            sum_vectors_cast.loc[self.similar_movies.index]
        )
        crew_cs = cosine_similarity(
            sum_vectors_crew.loc[self.like_df.index],
            sum_vectors_crew.loc[self.similar_movies.index]
        )
        
        self.cosine_similarity['keywords'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
        
        self.cosine_similarity['cast'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
        
        self.cosine_similarity['crew'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
    
    def gather_inds_by_cs(self):
        inds = []
        for title in like:
            inds += self.cosine_similarity['keywords'][
                self.cosine_similarity['keywords'][title]>self.cs_THRESHOLD
            ][title].index.to_list()\
            + self.cosine_similarity['cast'][
                self.cosine_similarity['cast'][title]>self.cs_THRESHOLD
            ][title].index.to_list()\
            + self.cosine_similarity['crew'][
                self.cosine_similarity['crew'][title]>self.cs_THRESHOLD
            ][title].index.to_list()
        
        return list(set(inds))
    
    def show_recommendation(self, amount=10, sorted_by='vote_average'):
        inds = self.gather_inds_by_cs()
        recommendation = self.similar_movies.loc[inds]
        
        if sorted_by in recommendation.columns and amount < recommendation.shape[0]:
            return recommendation.sort_values([sorted_by], ascending=False)[:amount]
        
        return recommendation
        
    def get_similar_movies(self)->pd.DataFrame:
        return movies[
                         (~movies['title'].isin(like))
                         & (movies['vote_average'] >= self.min_rating)
                         & (movies['vote_count'] >= self.MIN_VOTE_COUNT)
                         & (movies['popularity'] > self.MIN_POPULARITY)
                         & (movies['label'].isin(self.like_df['label'].to_list()))
                     ]

    def gather_like_info(self)->list:
        self.min_rating = self.like_df['vote_average'].min()-0.2
    
    def show_what_i_like(self)->pd.DataFrame:
        return self.like_df
    
    

In [38]:
what_to_watch = IWantToWatchSomethingLike(like)

what_to_watch.show_what_i_like()

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
348,"[27, 878]","During its return to the earth, commercial spa...",62.215,1979-05-25,Alien,8.1,12914,"Horror, Science Fiction","[{'cast_id': 10205, 'name': 'Sigourney Weaver'...","[{'crew_id': 578, 'name': 'Ridley Scott', 'dep...","[{'id': 803, 'name': 'android'}, {'id': 1612, ...",Science Fiction,14
755,"[27, 28, 53, 80]",Seth Gecko and his younger brother Richard are...,27.675,1996-01-19,From Dusk Till Dawn,7.0,5301,"Horror, Action, Thriller, Crime","[{'cast_id': 1461, 'name': 'George Clooney', '...","[{'crew_id': 2294, 'name': 'Robert Rodriguez',...","[{'id': 246, 'name': 'dancing'}, {'id': 380, '...",Horror,10
9392,"[12, 27]","After a tragic accident, six friends reunite f...",22.417,2005-07-08,The Descent,7.0,3394,"Adventure, Horror","[{'cast_id': 57574, 'name': 'Shauna Macdonald'...","[{'crew_id': 558, 'name': 'David Julyan', 'dep...","[{'id': 1852, 'name': 'mutant'}, {'id': 1963, ...",Horror,10


In [39]:
what_to_watch.show_recommendation(sorted_by='release_date')

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
713704,"[53, 27]",A reunion between two estranged sisters gets c...,754.93,2023-04-12,Evil Dead Rise,7.0,1765,"Thriller, Horror","[{'cast_id': 1186659, 'name': 'Lily Sullivan',...","[{'crew_id': 7623, 'name': 'Sam Raimi', 'depar...","[{'id': 380, 'name': 'sibling relationship'}, ...",Horror,10
934433,"[27, 9648, 53]","Following the latest Ghostface killings, the f...",315.504,2023-03-08,Scream VI,7.2,1418,"Horror, Mystery, Thriller","[{'cast_id': 1373659, 'name': 'Melissa Barrera...","[{'crew_id': 409, 'name': 'Cathy Konrad', 'dep...","[{'id': 242, 'name': 'new york city'}, {'id': ...",Horror,10
76600,"[878, 12, 28]",Set more than a decade after the events of the...,899.251,2022-12-14,Avatar: The Way of Water,7.7,8861,"Science Fiction, Adventure, Action","[{'cast_id': 65731, 'name': 'Sam Worthington',...","[{'crew_id': 1721, 'name': 'Stephen E. Rivkin'...","[{'id': 697, 'name': 'loss of loved one'}, {'i...",Science Fiction,14
663712,"[27, 53]","After being resurrected by a sinister entity, ...",197.173,2022-10-06,Terrifier 2,6.9,1268,"Horror, Thriller","[{'cast_id': 1882502, 'name': 'Lauren LaVera',...","[{'crew_id': 83331, 'name': 'Steven Barton', '...","[{'id': 3199, 'name': 'clown'}, {'id': 3335, '...",Horror,10
913290,"[27, 9648, 53]","In town for a job interview, a young woman arr...",40.927,2022-09-08,Barbarian,6.9,1530,"Horror, Mystery, Thriller","[{'cast_id': 1335315, 'name': 'Georgina Campbe...","[{'crew_id': 376, 'name': 'Arnon Milchan', 'de...","[{'id': 739, 'name': 'sexual abuse'}, {'id': 1...",Horror,10
756999,"[27, 53]","Finney Blake, a shy but clever 13-year-old boy...",77.743,2022-06-22,The Black Phone,7.7,3946,"Horror, Thriller","[{'cast_id': 2803710, 'name': 'Mason Thames', ...","[{'crew_id': 494, 'name': 'Terri Taylor', 'dep...","[{'id': 380, 'name': 'sibling relationship'}, ...",Horror,10
696806,"[12, 878]","After accidentally crash-landing in 2022, time...",54.317,2022-03-11,The Adam Project,7.1,3652,"Adventure, Science Fiction","[{'cast_id': 10859, 'name': 'Ryan Reynolds', '...","[{'crew_id': 10956, 'name': 'Josh McLaglen', '...","[{'id': 4379, 'name': 'time travel'}, {'id': 1...",Science Fiction,14
522402,"[878, 18, 12]","On a post-apocalyptic Earth, a robot, built to...",57.57,2021-11-04,Finch,7.9,2945,"Science Fiction, Drama, Adventure","[{'cast_id': 31, 'name': 'Tom Hanks', 'charact...","[{'crew_id': 24, 'name': 'Robert Zemeckis', 'd...","[{'id': 14544, 'name': 'robot'}, {'id': 189092...",Science Fiction,14
576845,"[27, 9648, 53]","A young girl, passionate about fashion design,...",33.303,2021-10-21,Last Night in Soho,7.4,2608,"Horror, Mystery, Thriller","[{'cast_id': 1356758, 'name': 'Thomasin McKenz...","[{'crew_id': 2236, 'name': 'Tim Bevan', 'depar...","[{'id': 212, 'name': 'london, england'}, {'id'...",Horror,10
588228,"[28, 878, 12]",The world is stunned when a group of time trav...,75.953,2021-09-03,The Tomorrow War,7.6,2612,"Action, Science Fiction, Adventure","[{'cast_id': 73457, 'name': 'Chris Pratt', 'ch...","[{'crew_id': 3893, 'name': 'David S. Goyer', '...","[{'id': 1477, 'name': 'world cup'}, {'id': 437...",Science Fiction,14


In [35]:
what_to_watch.show_recommendation(sorted_by='vote_average', amount=5)

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
157336,"[12, 18, 878]",The adventures of a group of explorers who mak...,130.98,2014-11-05,Interstellar,8.4,31699,"Adventure, Drama, Science Fiction","[{'cast_id': 10297, 'name': 'Matthew McConaugh...","[{'crew_id': 947, 'name': 'Hans Zimmer', 'depa...","[{'id': 310, 'name': 'artificial intelligence'...",Science Fiction,14
1891,"[12, 28, 878]","The epic saga continues as Luke Skywalker, in ...",31.938,1980-05-20,The Empire Strikes Back,8.4,15460,"Adventure, Action, Science Fiction","[{'cast_id': 2, 'name': 'Mark Hamill', 'charac...","[{'crew_id': 1, 'name': 'George Lucas', 'depar...","[{'id': 526, 'name': 'rebel'}, {'id': 803, 'na...",Science Fiction,14
280,"[28, 53, 878]",Nearly 10 years have passed since Sarah Connor...,69.226,1991-07-03,Terminator 2: Judgment Day,8.1,11448,"Action, Thriller, Science Fiction","[{'cast_id': 1100, 'name': 'Arnold Schwarzeneg...","[{'crew_id': 563, 'name': 'Dody Dorn', 'depart...","[{'id': 679, 'name': 'cyborg'}, {'id': 1366, '...",Science Fiction,14
679,"[28, 53, 878]",When Ripley's lifepod is found by a salvage cr...,59.005,1986-07-18,Aliens,7.9,8542,"Action, Thriller, Science Fiction","[{'cast_id': 10205, 'name': 'Sigourney Weaver'...","[{'crew_id': 8380, 'name': 'Michael Lamont', '...","[{'id': 803, 'name': 'android'}, {'id': 1603, ...",Science Fiction,14
522402,"[878, 18, 12]","On a post-apocalyptic Earth, a robot, built to...",57.57,2021-11-04,Finch,7.9,2945,"Science Fiction, Drama, Adventure","[{'cast_id': 31, 'name': 'Tom Hanks', 'charact...","[{'crew_id': 24, 'name': 'Robert Zemeckis', 'd...","[{'id': 14544, 'name': 'robot'}, {'id': 189092...",Science Fiction,14


In [36]:
what_to_watch.show_recommendation(amount=10000)

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1547,"[27, 35, 53]",A mother and her two teenage sons move to a se...,20.128,1987-07-31,The Lost Boys,7.1,1732,"Horror, Comedy, Thriller","[{'cast_id': 12261, 'name': 'Jason Patric', 'c...","[{'crew_id': 153, 'name': 'Thomas Newman', 'de...","[{'id': 387, 'name': 'california'}, {'id': 542...",Horror,10
17431,"[878, 18]",With only three weeks left in his three year c...,19.520,2009-06-12,Moon,7.6,5130,"Science Fiction, Drama","[{'cast_id': 6807, 'name': 'Sam Rockwell', 'ch...","[{'crew_id': 5497, 'name': 'Aleksandar Jovanov...","[{'id': 305, 'name': 'moon'}, {'id': 310, 'nam...",Science Fiction,14
338967,"[35, 27]","Columbus, Tallahassee, Wichita, and Little Roc...",39.657,2019-10-09,Zombieland: Double Tap,7.0,5062,"Comedy, Horror","[{'cast_id': 57755, 'name': 'Woody Harrelson',...","[{'crew_id': 561, 'name': 'John Papsidera', 'd...","[{'id': 833, 'name': 'the white house'}, {'id'...",Horror,10
10779,"[27, 35]","Once an architect, Frank Bannister now passes ...",19.185,1996-07-18,The Frighteners,6.9,1323,"Horror, Comedy","[{'cast_id': 521, 'name': 'Michael J. Fox', 'c...","[{'crew_id': 24, 'name': 'Robert Zemeckis', 'd...","[{'id': 456, 'name': 'mother'}, {'id': 798, 'n...",Horror,10
19995,"[28, 12, 14, 878]","In the 22nd century, a paraplegic Marine is di...",209.916,2009-12-15,Avatar,7.6,29311,"Action, Adventure, Fantasy, Science Fiction","[{'cast_id': 65731, 'name': 'Sam Worthington',...","[{'crew_id': 1721, 'name': 'Stephen E. Rivkin'...","[{'id': 1463, 'name': 'culture clash'}, {'id':...",Science Fiction,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19908,"[35, 27]",Columbus has made a habit of running from what...,39.697,2009-10-07,Zombieland,7.3,11268,"Comedy, Horror","[{'cast_id': 57755, 'name': 'Woody Harrelson',...","[{'crew_id': 561, 'name': 'John Papsidera', 'd...","[{'id': 291, 'name': 'circus'}, {'id': 380, 'n...",Horror,10
336843,"[878, 28, 12, 53]",Thomas leads his group of escaped Gladers on t...,87.886,2018-01-10,Maze Runner: The Death Cure,7.1,6989,"Science Fiction, Action, Adventure, Thriller","[{'cast_id': 527393, 'name': ""Dylan O'Brien"", ...","[{'crew_id': 2215, 'name': 'Denise Chamian', '...","[{'id': 818, 'name': 'based on novel or book'}...",Science Fiction,14
985,"[14, 27]",First time father Henry Spencer tries to survi...,16.080,1977-03-19,Eraserhead,7.4,2131,"Fantasy, Horror","[{'cast_id': 6718, 'name': 'Jack Nance', 'char...","[{'crew_id': 4434, 'name': 'Frederick Elmes', ...","[{'id': 1009, 'name': 'baby'}, {'id': 1852, 'n...",Horror,10
696806,"[12, 878]","After accidentally crash-landing in 2022, time...",54.317,2022-03-11,The Adam Project,7.1,3652,"Adventure, Science Fiction","[{'cast_id': 10859, 'name': 'Ryan Reynolds', '...","[{'crew_id': 10956, 'name': 'Josh McLaglen', '...","[{'id': 4379, 'name': 'time travel'}, {'id': 1...",Science Fiction,14
