# Recommendation System

In [1]:
from collections import Counter
from itertools import combinations
import re

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

import matplotlib.pyplot as plt
import seaborn as sns

import pickle as pk

from jupyterthemes import jtplot
jtplot.style(figsize=(15, 9))

## Data

In [10]:
movies = pd.read_csv('data/movies_dataset_CLEAN_noNaN.csv', index_col='id')

# Vectors
vectors_keywords = pd.read_csv('data/keywords_vectors.csv', index_col=0)
vectors_cast     = pd.read_csv('data/cast_vectors.csv', index_col=0)
vectors_crew     = pd.read_csv('data/crew_vectors.csv', index_col=0)

# Sum of Vectors
sum_vectors_keywords = pd.read_csv('data/keywords_sum_vectors.csv', index_col='id')
sum_vectors_cast     = pd.read_csv('data/cast_sum_vectors.csv', index_col='id')
sum_vectors_crew     = pd.read_csv('data/crew_sum_vectors.csv', index_col='id')

# Genres cosine similarity
genres_cs = pd.read_csv('DRAFT/data/cosine_similarity_genres.csv', index_col=0)

## Movies choice

In [3]:
like = [
    'From Dusk Till Dawn',
    'Alien',
    'The Descent',
]

In [17]:
like_df = movies[movies['title'].isin(like)]

In [4]:
def i_want_to_watch_something_like(like:list)->pd.DataFrame:
    return movies[movies['title'].isin(like)]

In [5]:
i_want_to_watch_something_like(like)

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
348,"[27, 878]","During its return to the earth, commercial spa...",62.215,1979-05-25,Alien,8.1,12914,"Horror, Science Fiction","[{'cast_id': 10205, 'name': 'Sigourney Weaver'...","[{'crew_id': 578, 'name': 'Ridley Scott', 'dep...","[{'id': 803, 'name': 'android'}, {'id': 1612, ...",Science Fiction,14
755,"[27, 28, 53, 80]",Seth Gecko and his younger brother Richard are...,27.675,1996-01-19,From Dusk Till Dawn,7.0,5301,"Horror, Action, Thriller, Crime","[{'cast_id': 1461, 'name': 'George Clooney', '...","[{'crew_id': 2294, 'name': 'Robert Rodriguez',...","[{'id': 246, 'name': 'dancing'}, {'id': 380, '...",Horror,10
9392,"[12, 27]","After a tragic accident, six friends reunite f...",22.417,2005-07-08,The Descent,7.0,3394,"Adventure, Horror","[{'cast_id': 57574, 'name': 'Shauna Macdonald'...","[{'crew_id': 558, 'name': 'David Julyan', 'dep...","[{'id': 1852, 'name': 'mutant'}, {'id': 1963, ...",Horror,10


In [34]:
class IWantToWatchSomethingLike:
    def __init__(self, like:list):
        self.like = like
        self.like_df = movies[movies['title'].isin(self.like)]
        self.gather_like_info()
        self.similar_movies = self.get_similar_movies()
        self.cosine_similarity = {
            'keywords': pd.DataFrame([]),
            'cast'    : pd.DataFrame([]),
            'crew'    : pd.DataFrame([]),
        }
        self.get_cosine_similarity()
    
    def get_cosine_similarity(self)->dict:
        keywords_cs = cosine_similarity(
            sum_vectors_keywords.loc[self.like_df.index],
            sum_vectors_keywords.loc[self.similar_movies.index]
        )
        cast_cs = cosine_similarity(
            sum_vectors_cast.loc[self.like_df.index],
            sum_vectors_cast.loc[self.similar_movies.index]
        )
        crew_cs = cosine_similarity(
            sum_vectors_crew.loc[self.like_df.index],
            sum_vectors_crew.loc[self.similar_movies.index]
        )
        
        self.cosine_similarity['keywords'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
        
        self.cosine_similarity['cast'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )
        
        self.cosine_similarity['crew'] = pd.DataFrame(
            index=self.similar_movies.index,
            columns=self.like_df['title'],
            data=keywords_cs.transpose()
        )

    def get_similar_movies(self)->pd.DataFrame:
        return movies[
                         (~movies['title'].isin(like))
                         & (movies['vote_average'] >= self.min_rating)
                         & (movies['label'].isin(self.like_df['label'].to_list()))
                     ]
    
    def gather_like_info(self)->list:
        self.min_rating = self.like_df['vote_average'].min()-0.2
    
    def show_what_i_like(self)->pd.DataFrame:
        return self.like_df

In [35]:
what_to_watch = IWantToWatchSomethingLike(like)

what_to_watch.show_what_i_like()

Unnamed: 0_level_0,genre_ids,overview,popularity,release_date,title,vote_average,vote_count,genres,cast,crew,keywords,label,label_encoder
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
348,"[27, 878]","During its return to the earth, commercial spa...",62.215,1979-05-25,Alien,8.1,12914,"Horror, Science Fiction","[{'cast_id': 10205, 'name': 'Sigourney Weaver'...","[{'crew_id': 578, 'name': 'Ridley Scott', 'dep...","[{'id': 803, 'name': 'android'}, {'id': 1612, ...",Science Fiction,14
755,"[27, 28, 53, 80]",Seth Gecko and his younger brother Richard are...,27.675,1996-01-19,From Dusk Till Dawn,7.0,5301,"Horror, Action, Thriller, Crime","[{'cast_id': 1461, 'name': 'George Clooney', '...","[{'crew_id': 2294, 'name': 'Robert Rodriguez',...","[{'id': 246, 'name': 'dancing'}, {'id': 380, '...",Horror,10
9392,"[12, 27]","After a tragic accident, six friends reunite f...",22.417,2005-07-08,The Descent,7.0,3394,"Adventure, Horror","[{'cast_id': 57574, 'name': 'Shauna Macdonald'...","[{'crew_id': 558, 'name': 'David Julyan', 'dep...","[{'id': 1852, 'name': 'mutant'}, {'id': 1963, ...",Horror,10


In [36]:
what_to_watch.cosine_similarity['keywords']

title,Alien,From Dusk Till Dawn,The Descent
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
667538,0.870795,0.775327,0.845288
447365,0.869289,0.757707,0.823725
76600,0.857724,0.932551,0.965158
713704,0.780369,0.967760,0.977525
758323,0.686556,0.944765,0.916787
...,...,...,...
473072,0.979777,0.688127,0.764800
11482,0.607563,0.914675,0.853904
313106,0.979590,0.636905,0.785310
419639,0.909650,0.882491,0.884897
