# Recommendation System

In [1]:
from collections import Counter
from itertools import combinations
import re

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

import matplotlib.pyplot as plt
import seaborn as sns

import pickle as pk

from jupyterthemes import jtplot
jtplot.style(figsize=(15, 9))

## Data

In [2]:
movies = pd.read_csv('data/movies_dataset_CLEAN_noNaN.csv', index_col='id')

# Vectors
vectors_keywords = pd.read_csv('data/keywords_vectors.csv', index_col=0)
vectors_cast     = pd.read_csv('data/cast_vectors.csv', index_col=0)
vectors_crew     = pd.read_csv('data/crew_vectors.csv', index_col=0)

# Sum of Vectors
sum_vectors_keywords = pd.read_csv('data/keywords_sum_vectors.csv', index_col='id')
sum_vectors_cast     = pd.read_csv('data/cast_sum_vectors.csv', index_col='id')
sum_vectors_crew     = pd.read_csv('data/crew_sum_vectors.csv', index_col='id')

# Genres cosine similarity
genres_cs = pd.read_csv('DRAFT/data/cosine_similarity_genres.csv', index_col=0)

## Movies choice

In [3]:
like = [
    'From Dusk Till Dawn',
    'Alien',
    'The Descent',
]

In [4]:
like_df = movies[movies['title'].isin(like)]

In [5]:
def i_want_to_watch_something_like(like:list)->pd.DataFrame:
    return movies[movies['title'].isin(like)]

In [5]:
i_want_to_watch_something_like(like)

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
348,"[27, 878]","During its return to the earth, commercial spa...",62.215,1979-05-25,Alien,8.1,12914,"Horror, Science Fiction","[{'cast_id': 10205, 'name': 'Sigourney Weaver'...","[{'crew_id': 578, 'name': 'Ridley Scott', 'dep...","[{'id': 803, 'name': 'android'}, {'id': 1612, ...",Science Fiction,14
755,"[27, 28, 53, 80]",Seth Gecko and his younger brother Richard are...,27.675,1996-01-19,From Dusk Till Dawn,7.0,5301,"Horror, Action, Thriller, Crime","[{'cast_id': 1461, 'name': 'George Clooney', '...","[{'crew_id': 2294, 'name': 'Robert Rodriguez',...","[{'id': 246, 'name': 'dancing'}, {'id': 380, '...",Horror,10
9392,"[12, 27]","After a tragic accident, six friends reunite f...",22.417,2005-07-08,The Descent,7.0,3394,"Adventure, Horror","[{'cast_id': 57574, 'name': 'Shauna Macdonald'...","[{'crew_id': 558, 'name': 'David Julyan', 'dep...","[{'id': 1852, 'name': 'mutant'}, {'id': 1963, ...",Horror,10


In [25]:
class IWantToWatchSomethingLike:
    def __init__(self, like:list):
        self.like = like
        self.like_df = movies[movies['title'].isin(self.like)]
        self.gather_like_info()
        self.similar_movies = self.get_similar_movies()
        self.cosine_similarity = {
            'keywords': pd.DataFrame([]),
            'cast'    : pd.DataFrame([]),
            'crew'    : pd.DataFrame([]),
        }
        self.get_cosine_similarity()
        self.cs_THRESHOLD = 0.95
    
    def get_cosine_similarity(self)->dict:
        keywords_cs = cosine_similarity(
            sum_vectors_keywords.loc[self.like_df.index],
            sum_vectors_keywords.loc[self.similar_movies.index]
        )
        cast_cs = cosine_similarity(
            sum_vectors_cast.loc[self.like_df.index],
            sum_vectors_cast.loc[self.similar_movies.index]
        )
        crew_cs = cosine_similarity(
            sum_vectors_crew.loc[self.like_df.index],
            sum_vectors_crew.loc[self.similar_movies.index]
        )
        
        self.cosine_similarity['keywords'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
        
        self.cosine_similarity['cast'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
        
        self.cosine_similarity['crew'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
    
    def gather_inds_by_cs(self):
        inds = []
        for title in like:
            inds += self.cosine_similarity['keywords'][
                self.cosine_similarity['keywords'][title]>self.cs_THRESHOLD
            ][title].index.to_list()\
            + self.cosine_similarity['cast'][
                self.cosine_similarity['cast'][title]>self.cs_THRESHOLD
            ][title].index.to_list()\
            + self.cosine_similarity['crew'][
                self.cosine_similarity['crew'][title]>self.cs_THRESHOLD
            ][title].index.to_list()
        
        return list(set(inds))
    
    def show_recommendation(self):
        inds = self.gather_inds_by_cs()
        recommendation = self.similar_movies.loc[inds][(self.similar_movies['popularity']>14)]
        
        return recommendation
    
    def get_similar_movies(self)->pd.DataFrame:
        return movies[
                         (~movies['title'].isin(like))
                         & (movies['vote_average'] >= self.min_rating)
                         & (movies['label'].isin(self.like_df['label'].to_list()))
                     ]

    def gather_like_info(self)->list:
        self.min_rating = self.like_df['vote_average'].min()-0.2
    
    def show_what_i_like(self)->pd.DataFrame:
        return self.like_df
    
    

In [26]:
what_to_watch = IWantToWatchSomethingLike(like)

what_to_watch.show_what_i_like()

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
348,"[27, 878]","During its return to the earth, commercial spa...",62.215,1979-05-25,Alien,8.1,12914,"Horror, Science Fiction","[{'cast_id': 10205, 'name': 'Sigourney Weaver'...","[{'crew_id': 578, 'name': 'Ridley Scott', 'dep...","[{'id': 803, 'name': 'android'}, {'id': 1612, ...",Science Fiction,14
755,"[27, 28, 53, 80]",Seth Gecko and his younger brother Richard are...,27.675,1996-01-19,From Dusk Till Dawn,7.0,5301,"Horror, Action, Thriller, Crime","[{'cast_id': 1461, 'name': 'George Clooney', '...","[{'crew_id': 2294, 'name': 'Robert Rodriguez',...","[{'id': 246, 'name': 'dancing'}, {'id': 380, '...",Horror,10
9392,"[12, 27]","After a tragic accident, six friends reunite f...",22.417,2005-07-08,The Descent,7.0,3394,"Adventure, Horror","[{'cast_id': 57574, 'name': 'Shauna Macdonald'...","[{'crew_id': 558, 'name': 'David Julyan', 'dep...","[{'id': 1852, 'name': 'mutant'}, {'id': 1963, ...",Horror,10


In [27]:
what_to_watch.show_recommendation()

  recommendation = self.similar_movies.loc[inds][(self.similar_movies['popularity']>14)]


Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1547,"[27, 35, 53]",A mother and her two teenage sons move to a se...,20.128,1987-07-31,The Lost Boys,7.1,1732,"Horror, Comedy, Thriller","[{'cast_id': 12261, 'name': 'Jason Patric', 'c...","[{'crew_id': 153, 'name': 'Thomas Newman', 'de...","[{'id': 387, 'name': 'california'}, {'id': 542...",Horror,10
17431,"[878, 18]",With only three weeks left in his three year c...,19.520,2009-06-12,Moon,7.6,5130,"Science Fiction, Drama","[{'cast_id': 6807, 'name': 'Sam Rockwell', 'ch...","[{'crew_id': 5497, 'name': 'Aleksandar Jovanov...","[{'id': 305, 'name': 'moon'}, {'id': 310, 'nam...",Science Fiction,14
338967,"[35, 27]","Columbus, Tallahassee, Wichita, and Little Roc...",39.657,2019-10-09,Zombieland: Double Tap,7.0,5062,"Comedy, Horror","[{'cast_id': 57755, 'name': 'Woody Harrelson',...","[{'crew_id': 561, 'name': 'John Papsidera', 'd...","[{'id': 833, 'name': 'the white house'}, {'id'...",Horror,10
10779,"[27, 35]","Once an architect, Frank Bannister now passes ...",19.185,1996-07-18,The Frighteners,6.9,1323,"Horror, Comedy","[{'cast_id': 521, 'name': 'Michael J. Fox', 'c...","[{'crew_id': 24, 'name': 'Robert Zemeckis', 'd...","[{'id': 456, 'name': 'mother'}, {'id': 798, 'n...",Horror,10
19995,"[28, 12, 14, 878]","In the 22nd century, a paraplegic Marine is di...",209.916,2009-12-15,Avatar,7.6,29311,"Action, Adventure, Fantasy, Science Fiction","[{'cast_id': 65731, 'name': 'Sam Worthington',...","[{'crew_id': 1721, 'name': 'Stephen E. Rivkin'...","[{'id': 1463, 'name': 'culture clash'}, {'id':...",Science Fiction,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
336843,"[878, 28, 12, 53]",Thomas leads his group of escaped Gladers on t...,87.886,2018-01-10,Maze Runner: The Death Cure,7.1,6989,"Science Fiction, Action, Adventure, Thriller","[{'cast_id': 527393, 'name': ""Dylan O'Brien"", ...","[{'crew_id': 2215, 'name': 'Denise Chamian', '...","[{'id': 818, 'name': 'based on novel or book'}...",Science Fiction,14
985,"[14, 27]",First time father Henry Spencer tries to survi...,16.080,1977-03-19,Eraserhead,7.4,2131,"Fantasy, Horror","[{'cast_id': 6718, 'name': 'Jack Nance', 'char...","[{'crew_id': 4434, 'name': 'Frederick Elmes', ...","[{'id': 1009, 'name': 'baby'}, {'id': 1852, 'n...",Horror,10
696806,"[12, 878]","After accidentally crash-landing in 2022, time...",54.317,2022-03-11,The Adam Project,7.1,3652,"Adventure, Science Fiction","[{'cast_id': 10859, 'name': 'Ryan Reynolds', '...","[{'crew_id': 10956, 'name': 'Josh McLaglen', '...","[{'id': 4379, 'name': 'time travel'}, {'id': 1...",Science Fiction,14
713704,"[53, 27]",A reunion between two estranged sisters gets c...,754.930,2023-04-12,Evil Dead Rise,7.0,1765,"Thriller, Horror","[{'cast_id': 1186659, 'name': 'Lily Sullivan',...","[{'crew_id': 7623, 'name': 'Sam Raimi', 'depar...","[{'id': 380, 'name': 'sibling relationship'}, ...",Horror,10
