# Movie Recommender System

### Import Libraries and Load Data

In [105]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import re
import string
import random 
import requests
import contractions

# from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn import metrics 
from sklearn.neighbors import NearestNeighbors

from scipy.sparse import csr_matrix
from scipy.sparse.linalg import *
from scipy.sparse.linalg import svds

from sparsesvd import sparsesvd

from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize


%matplotlib inline

pd.set_option("display.max_columns",350)

In [106]:
df = pd.read_csv('./data/df_to_preprocess.csv')
df.drop(columns = 'Unnamed: 0', inplace = True)
df.head()

Unnamed: 0,rt_id,critic_name,publisher,review_type,review_date,review_content,title,plot,critics_consensus,content_rating,genre,director,writer,cast,original_release_date,streaming_release_date,runtime,production_company,tomatometer_status,tomatometer_rating,tomatometer_count,audience_status,audience_rating,audience_count,critic_id,review_score,year,imdb_title,country,imdb_plot,imdb_score,imdb_count,review_negative,review_neutral,review_positive,review_compound,consensus_negative,consensus_neutral,consensus_positive,consensus_compound,score
0,m/0814255,Ben McEachen,Sunday Mail (Australia),1,2010-02-09,Whether audiences will get behind The Lightnin...,Percy Jackson & the Olympians: The Lightning T...,"Always trouble-prone, the life of teenager Per...",Though it may seem like just another Harry Pot...,PG,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus,"Craig Titley, Chris Columbus, Rick Riordan","Logan Lerman, Brandon T. Jackson, Alexandra Da...",2010-02-12,2015-11-25,119.0,20th Century Fox,0,49.0,149.0,0,53.0,254421.0,1022,70.0,2010,tt0814255,"UK, Canada, USA",A teenager discovers he's the descendant of a ...,59.0,174198.0,0.08,0.727,0.193,0.705,0.0,0.59,0.41,0.9274,53.95
1,m/0814255,Nick Schager,Slant Magazine,0,2010-02-10,Harry Potter knockoffs don't come more transpa...,Percy Jackson & the Olympians: The Lightning T...,"Always trouble-prone, the life of teenager Per...",Though it may seem like just another Harry Pot...,PG,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus,"Craig Titley, Chris Columbus, Rick Riordan","Logan Lerman, Brandon T. Jackson, Alexandra Da...",2010-02-12,2015-11-25,119.0,20th Century Fox,0,49.0,149.0,0,53.0,254421.0,7475,25.0,2010,tt0814255,"UK, Canada, USA",A teenager discovers he's the descendant of a ...,59.0,174198.0,0.0,1.0,0.0,0.0,0.0,0.59,0.41,0.9274,53.95
2,m/0878835,Erik Childress,eFilmCritic.com,1,2010-01-31,Holofcener always gives us more to chew on tha...,Please Give,Kate (Catherine Keener) and her husband Alex (...,Nicole Holofcener's newest might seem slight i...,R,Comedy,Nicole Holofcener,Nicole Holofcener,"Catherine Keener, Amanda Peet, Oliver Platt, R...",2010-04-30,2012-09-04,90.0,Sony Pictures Classics,1,87.0,142.0,1,64.0,11574.0,3177,75.0,2010,tt0878835,USA,"In New York City, a husband and wife butt head...",66.0,10928.0,0.0,0.928,0.072,0.4767,0.0,0.885,0.115,0.5023,73.53
3,m/0878835,Richard Mowe,Boxoffice Magazine,1,2010-03-24,"The tone is low-key but very funny. Yet, at th...",Please Give,Kate (Catherine Keener) and her husband Alex (...,Nicole Holofcener's newest might seem slight i...,R,Comedy,Nicole Holofcener,Nicole Holofcener,"Catherine Keener, Amanda Peet, Oliver Platt, R...",2010-04-30,2012-09-04,90.0,Sony Pictures Classics,1,87.0,142.0,1,64.0,11574.0,8239,60.0,2010,tt0878835,USA,"In New York City, a husband and wife butt head...",66.0,10928.0,0.0,0.816,0.184,0.6474,0.0,0.885,0.115,0.5023,73.53
4,m/10,Scott Weinberg,eFilmCritic.com,1,2002-07-25,Obvious but entertaining portrayal of midlife ...,10,"A successful, middle-aged Hollywood songwriter...",Blake Edwards' bawdy comedy may not score a pe...,R,"Comedy, Romance",Blake Edwards,Blake Edwards,"Dudley Moore, Bo Derek, Julie Andrews, Robert ...",1979-10-05,2014-07-24,122.0,Waner Bros.,1,67.0,24.0,0,53.0,14684.0,8874,80.0,1979,tt0078721,USA,A Hollywood composer goes through a mid-life c...,61.0,14946.0,0.486,0.332,0.182,-0.8126,0.262,0.547,0.191,-0.4676,60.03


In [107]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13173 entries, 0 to 13172
Data columns (total 41 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   rt_id                   13173 non-null  object 
 1   critic_name             13173 non-null  object 
 2   publisher               13173 non-null  object 
 3   review_type             13173 non-null  int64  
 4   review_date             13173 non-null  object 
 5   review_content          13173 non-null  object 
 6   title                   13173 non-null  object 
 7   plot                    13173 non-null  object 
 8   critics_consensus       13173 non-null  object 
 9   content_rating          13173 non-null  object 
 10  genre                   13173 non-null  object 
 11  director                13173 non-null  object 
 12  writer                  13173 non-null  object 
 13  cast                    13173 non-null  object 
 14  original_release_date   13173 non-null

# Pre-Process for Text Recommender

In [108]:
# Instantiating
tokenizer = word_tokenize
stop_words = stopwords
stemmer = WordNetLemmatizer()

In [109]:
df.head(1)

Unnamed: 0,rt_id,critic_name,publisher,review_type,review_date,review_content,title,plot,critics_consensus,content_rating,genre,director,writer,cast,original_release_date,streaming_release_date,runtime,production_company,tomatometer_status,tomatometer_rating,tomatometer_count,audience_status,audience_rating,audience_count,critic_id,review_score,year,imdb_title,country,imdb_plot,imdb_score,imdb_count,review_negative,review_neutral,review_positive,review_compound,consensus_negative,consensus_neutral,consensus_positive,consensus_compound,score
0,m/0814255,Ben McEachen,Sunday Mail (Australia),1,2010-02-09,Whether audiences will get behind The Lightnin...,Percy Jackson & the Olympians: The Lightning T...,"Always trouble-prone, the life of teenager Per...",Though it may seem like just another Harry Pot...,PG,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus,"Craig Titley, Chris Columbus, Rick Riordan","Logan Lerman, Brandon T. Jackson, Alexandra Da...",2010-02-12,2015-11-25,119.0,20th Century Fox,0,49.0,149.0,0,53.0,254421.0,1022,70.0,2010,tt0814255,"UK, Canada, USA",A teenager discovers he's the descendant of a ...,59.0,174198.0,0.08,0.727,0.193,0.705,0.0,0.59,0.41,0.9274,53.95


In [110]:
# Noticed this movie shows up in a lot of results. Looking at genre it's listed as everything. 
# Going to update to sci-fi & comedy
df[df['title'] == 'Mystery Science Theater 3000: The Movie']

Unnamed: 0,rt_id,critic_name,publisher,review_type,review_date,review_content,title,plot,critics_consensus,content_rating,genre,director,writer,cast,original_release_date,streaming_release_date,runtime,production_company,tomatometer_status,tomatometer_rating,tomatometer_count,audience_status,audience_rating,audience_count,critic_id,review_score,year,imdb_title,country,imdb_plot,imdb_score,imdb_count,review_negative,review_neutral,review_positive,review_compound,consensus_negative,consensus_neutral,consensus_positive,consensus_compound,score
8095,m/mystery_science_theater_3000_the_movie,Barry Walters,San Francisco Examiner,1,2000-01-01,It doesn't take much imagination to poke fun a...,Mystery Science Theater 3000: The Movie,Dr. Clayton Forrester (Trace Beaulieu) figures...,Mystery Science Theater 3000: The Movie may be...,PG-13,"Comedy, Science Fiction & Fantasy, Special Int...",Jim Mallon,"Michael J. Nelson, Mary Jo Pehl, Bridget Jones...","Michael J. Nelson, Trace Beaulieu, Jim Mallon,...",1996-04-19,2014-03-18,73.0,MCA Universal Home Video,1,80.0,55.0,1,88.0,26936.0,975,62.5,1996,tt0117128,USA,Mike Nelson and his robot companions watch and...,73.0,17539.0,0.113,0.686,0.201,0.3506,0.0,0.754,0.246,0.8462,76.71
8096,m/mystery_science_theater_3000_the_movie,Susan Wloszczyna,USA Today,1,2000-01-01,From the opening spoof of 2001: A Space Odysse...,Mystery Science Theater 3000: The Movie,Dr. Clayton Forrester (Trace Beaulieu) figures...,Mystery Science Theater 3000: The Movie may be...,PG-13,"Comedy, Science Fiction & Fantasy, Special Int...",Jim Mallon,"Michael J. Nelson, Mary Jo Pehl, Bridget Jones...","Michael J. Nelson, Trace Beaulieu, Jim Mallon,...",1996-04-19,2014-03-18,73.0,MCA Universal Home Video,1,80.0,55.0,1,88.0,26936.0,9400,75.0,1996,tt0117128,USA,Mike Nelson and his robot companions watch and...,73.0,17539.0,0.0,0.803,0.197,0.6597,0.0,0.754,0.246,0.8462,76.71


I could do this with a bunch of movies, but don't have the time to. Something for next steps.

In [111]:
# Setting up custom stopwords + english
english = stopwords.words('english')
english = set(english)
my_stop_words = ['maybe','like','kind of','sort of','similar to', 'want', 'watch','content',
                'critics','review','movie','lacks','perfectly acceptable', 'consensus','best',
                 'excellent', "'d", "'ll", "'re", "'s", 'want','watch','I want to watch',
                 "'ve", 'could', 'might', 'must', "n't", 'need', 'sha', 'wo', 
                 'would', 'acceptable', 'kind', 'perfectly', 'similar', 'sort','i','of','to']
my_stop_words = set(my_stop_words)
new_stop_words = english.union(my_stop_words)

In [112]:
# Text column 
def create_text(x):
    return (''.join(x['title']) + ' ' + ''.join(x['plot']) + ' ' + ''.join(x['critics_consensus']) + ' ' + ''.join(x['review_content'])
            + ' ' + ''.join(x['genre']) + ' ' + ''.join(x['director']) + ' ' + ''.join(x['cast'] + ' ' + ''.join(x['imdb_plot'])))

df['text'] = df.apply(create_text, axis = 1)

df[['text']].head(2)

Unnamed: 0,text
0,Percy Jackson & the Olympians: The Lightning T...
1,Percy Jackson & the Olympians: The Lightning T...


In [113]:
def clean_text(text):
    # Set lowercase
    text = text.lower()
    
    # Expand contractions
    text = contractions.fix(text)

    # Remove punctuation
    tokenizer = RegexpTokenizer(r'\w+')
    text = tokenizer.tokenize(text)
    
     # Remove english stopwords
    text = [i for i in text if i not in new_stop_words]
    text = ' '.join(text)
    
     # Lemmatize
    text = stemmer.lemmatize(text)
    
    return text
    

In [114]:
test_text = df.head(1)

In [115]:
test_text['text'].apply(clean_text)[0]

'percy jackson olympians lightning thief always trouble prone life teenager percy jackson logan lerman gets lot complicated learns son greek god poseidon training ground children deities percy learns harness divine powers prepare adventure lifetime prevent feud among olympians erupting devastating war earth rescue mother clutches hades god underworld though may seem another harry potter knockoff percy jackson benefits strong supporting cast speedy plot plenty fun greek mythology whether audiences get behind lightning thief hard predict overall entertaining introduction promising new world consuming shadow potter big break free action adventure comedy drama science fiction fantasy chris columbus logan lerman brandon jackson alexandra daddario jake abel sean bean pierce brosnan steve coogan rosario dawson melina kanakaredes catherine keener kevin mckidd joe pantoliano uma thurman ray winstone julian richings bonita friedericy annie ilonzeh tania saulnier marie avgeropoulos luisa oliveira

In [116]:
df['text'] = df['text'].apply(clean_text)

In [117]:
# Saving df to use with flask 
df.to_csv('./website/assets/df_for_flask.csv')

# Content Based Recommender System

#### TF-IDF

The code below has been adapted from Betty LD's post on Towards Data Science [[1]](https://towardsdatascience.com/build-a-text-recommendation-system-with-python-e8b95d9f251c)

In [118]:
cos = {}
def get_idx(matrix):
    # returns sum of all tokens cosines for each sentence
    cos_sim = np.mean(matrix, axis=0)
    # Ranking index from highest to smallest
    index = np.argsort(cos_sim)[::-1]
    # Returning an array of cosine similarity shape, filled with 1s
    mask = np.ones(len(cos_sim))
    # Setting up truth value to ensure arrays are same shape
    mask = np.logical_or(cos_sim[index], mask)
    # Assigning index
    best_idx = index[mask][:35]
    cos['cosine'] = cos_sim
    return best_idx


# Get recomendations
def recs_tfidf(text, tfidf_matrix):
    
    # Get tokens
    letters_only = re.sub("[^a-zA-Z]",  " ", str(text))

    tokens = [str(t) for t in tokenizer(letters_only)]
    # Vectorize text
    text = vectorizer.transform(tokens)
    # Create list with similarity between text and dataset
    matrix = cosine_similarity(text, tfidf_matrix)
    best_idx = get_idx(matrix)
    return best_idx

def get_final(text, new_rec):
    if 'drama' in text.lower():
        mask = new_rec['genre'].str.contains('Drama' or 'Romance', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'comedy' in text.lower() or 'slapstick' in text.lower():
        mask = new_rec['genre'].str.contains('Comedy', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'horror' in text.lower() or 'scary' in text.lower():
        mask = new_rec['genre'].str.contains('Horror', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'action' in text.lower() or 'adventure' in text.lower():
        mask = new_rec['genre'].str.contains('Action & Adventure', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'mystery' in text.lower() or 'suspense' in text.lower():
        mask = new_rec['genre'].str.contains('Mystery & Suspense', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'international' in text.lower() or 'art' in text.lower():
        mask = new_rec['genre'].str.contains('Art House & International', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'romantic' in text.lower() or 'romance' in text.lower():
        mask = new_rec['genre'].str.contains('Romance', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'classic' in text.lower():
        mask = new_rec['genre'].str.contains('Classic', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    else:
        new_rec
    return new_rec

def get_recs(text):
    best_idx = recs_tfidf(text, tfidf_matrix)
    cosine = pd.DataFrame(cos)
    new_rec = (df[['title','plot','genre','score','tomatometer_rating','audience_rating','imdb_score']].iloc[best_idx])
    new_rec = pd.concat([new_rec, cosine], axis = 1)
    new_rec = new_rec.sort_values(by = ['score'], ascending = False)
    new_rec.drop_duplicates(subset = 'title', keep = 'first')
    new_rec = get_final(text,new_rec)
    return new_rec

def best_rec(text):
    best_rec = get_recs(text)
    best_rec = best_rec.drop_duplicates(subset = 'title')
    best_rec = best_rec[:10]
    best_rec = best_rec.reset_index()
    best_rec.drop(columns = 'index', inplace = True)
    return best_rec

# Fit TFIDF
vectorizer = TfidfVectorizer(stop_words = new_stop_words, lowercase = True, tokenizer = tokenizer, ngram_range = (1,4))
tfidf_matrix = vectorizer.fit_transform(df['text'].values)

In [119]:
text = 'Christmas' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,The Nightmare Before Christmas,The film follows the misadventures of Jack Ske...,"Animation, Kids & Family, Science Fiction & Fa...",87.68,95.0,91.0,80.0,0.158822
1,A Christmas Story,Based on the humorous writings of author Jean ...,"Comedy, Drama, Kids & Family",86.92,89.0,88.0,79.0,0.167427
2,White Christmas,Singers Bob Wallace (Bing Crosby) and Phil Dav...,"Classics, Kids & Family, Musical & Performing ...",77.26,77.0,88.0,76.0,0.09175
3,National Lampoon's Christmas Vacation,"As the holidays approach, Clark Griswold (Chev...",Comedy,73.7,67.0,86.0,76.0,0.121099
4,The Man Who Invented Christmas,"In 1843 London, author Charles Dickens finds h...","Comedy, Drama",72.62,79.0,77.0,70.0,0.20844
5,The Christmas Chronicles,Siblings Kate and Teddy Pierce hatch a scheme ...,"Comedy, Kids & Family",68.81,67.0,77.0,71.0,0.108325
6,Get Santa,"After crashing his sleigh, Santa Claus (Jim Br...","Comedy, Kids & Family",64.08,79.0,53.0,62.0,0.098729
7,The Night Before,"For the last 10 years, lifelong buddies Ethan ...",Comedy,61.76,68.0,61.0,64.0,0.110487
8,A Christmas Prince,Christmas comes early for an aspiring young jo...,"Comedy, Drama, Romance",59.5,73.0,47.0,58.0,0.119329
9,The Grinch,"The Grinch and his loyal dog, Max, live a soli...","Animation, Kids & Family, Science Fiction & Fa...",58.48,60.0,50.0,63.0,0.084127


In [120]:
text = 'I’m in the mood for a gut-wrenching dramatic movie' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,Schindler's List,Businessman Oskar Schindler (Liam Neeson) arri...,Drama,93.96,97.0,97.0,89.0,0.003704
1,The Wrestler,"Aging wrestler Randy ""The Ram"" Robinson (Micke...",Drama,84.78,98.0,88.0,79.0,0.003286
2,127 Hours,"While exploring a remote canyon in Utah, mount...",Drama,84.03,93.0,85.0,75.0,0.006242
3,The Attack,An award-winning Palestinian surgeon (Ali Suli...,Drama,77.9,87.0,78.0,71.0,0.004169
4,Without Limits,Before Steve Prefontaine (Billy Crudup) makes ...,"Documentary, Drama",77.68,79.0,91.0,72.0,0.003263
5,Detroit,"In the summer of 1967, rioting and civil unres...","Drama, Mystery & Suspense",77.07,82.0,79.0,73.0,0.00614
6,Macbeth,Proud Macbeth (Jon Finch) and his power-mad wi...,Drama,76.96,86.0,73.0,51.0,0.005325
7,The Pool,A wealthy family fascinates a working-class yo...,"Art House & International, Comedy, Drama",76.76,96.0,73.0,55.0,0.003291
8,The Body Remembers When the World Broke Open,"After a chance encounter on the street, a woma...",Drama,76.61,97.0,58.0,68.0,0.00363
9,Thirteen,Honor student Tracy Freeland (Evan Rachel Wood...,Drama,75.63,81.0,77.0,68.0,0.003286


In [121]:
text = 'College or High School Comedy' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,Dazed and Confused,This coming-of-age film follows the mayhem of ...,Comedy,84.78,92.0,90.0,76.0,0.04032
1,Election,"Jim McAllister (Matthew Broderick), a well-lik...",Comedy,80.65,92.0,79.0,72.0,0.027406
2,The Spectacular Now,"An innocent, bookish teenager (Shailene Woodle...","Comedy, Drama, Romance",78.83,91.0,76.0,71.0,0.030312
3,Grosse Pointe Blank,After assassin Martin Blank (John Cusack) has ...,"Action & Adventure, Comedy, Drama, Mystery & S...",78.13,80.0,87.0,73.0,0.025321
4,Easy A,Prompted by her popular best friend to spill d...,"Comedy, Romance",75.92,85.0,77.0,70.0,0.024066
5,Colma: The Musical,"Three friends (Jake Moreno, H.P. Mendoza, L.A....","Comedy, Drama, Musical & Performing Arts",74.31,90.0,69.0,64.0,0.030184
6,World's Greatest Dad,When the son of high school English teacher La...,"Comedy, Drama",73.36,88.0,67.0,69.0,0.024315
7,Hair High,"As the prom approaches, the head cheerleader (...","Animation, Comedy, Horror, Romance",69.98,75.0,73.0,68.0,0.034999
8,Peggy Sue Got Married,Peggy Sue Bodell (Kathleen Turner) attends her...,"Comedy, Drama, Science Fiction & Fantasy, Romance",68.41,86.0,55.0,63.0,0.023869
9,My Entire High School Sinking Into the Sea,High school sophomores Dash and Assaf are best...,"Animation, Comedy, Drama",67.31,85.0,56.0,60.0,0.043115


In [122]:
text = 'Scorsese' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,Taxi Driver,"Suffering from insomnia, disturbed loner Travi...","Drama, Mystery & Suspense",90.37,96.0,93.0,83.0,0.08431
1,Raging Bull,The story of a middleweight boxer as he rises ...,Drama,89.64,93.0,93.0,82.0,0.107518
2,The Departed,South Boston cop Billy Costigan (Leonardo DiCa...,"Action & Adventure, Drama, Mystery & Suspense",88.59,91.0,94.0,85.0,0.079855
3,Mean Streets,A slice of street life in Little Italy among l...,"Classics, Drama",86.03,95.0,84.0,73.0,0.128586
4,The King of Comedy,Rupert Pupkin (Robert De Niro) is a failure in...,"Comedy, Drama, Mystery & Suspense",85.83,89.0,90.0,78.0,0.129663
5,After Hours,"In a Manhattan cafe, word processor Paul Hacke...","Comedy, Mystery & Suspense",84.01,89.0,87.0,77.0,0.117859
6,Casino,"In early-1970s Las Vegas, low-level mobster Sa...",Drama,82.9,80.0,93.0,82.0,0.06446
7,Alice Doesn't Live Here Anymore,"After her husband dies, Alice (Ellen Burstyn) ...","Classics, Drama",81.02,88.0,82.0,73.0,0.094108
8,The Wolf of Wall Street,"In 1987, Jordan Belfort (Leonardo DiCaprio) ta...",Drama,81.0,80.0,83.0,82.0,0.056793
9,The Last Temptation of Christ,"Jesus (Willem Dafoe), a humble Judean carpente...",Drama,80.2,81.0,84.0,75.0,0.056076


### CountVectorizer

In [123]:
cos = {}
def get_idx(matrix):
    # returns sum of all tokens cosines for each sentence
    cos_sim = np.mean(matrix, axis=0)
    # Ranking index from highest to smallest
    index = np.argsort(cos_sim)[::-1]
    # Returning an array of cosine similarity shape, filled with 1s
    mask = np.ones(len(cos_sim))
    # Setting up truth value to ensure arrays are same shape
    mask = np.logical_or(cos_sim[index], mask)
    # Assigning index
    best_idx = index[mask][:35]
    cos['cosine'] = cos_sim
    return best_idx


# Get recomendations
def recs_cvec(text, cvec_matrix):
    # Get tokens
    letters_only = re.sub("[^a-zA-Z]",  " ", str(text))
    tokens = [str(t) for t in tokenizer(letters_only)]
    # Vectorize text
    text = vectorizer.transform(tokens)
    # Create list with similarity between text and dataset
    matrix = cosine_similarity(text, cvec_matrix)
    best_idx = get_idx(matrix)
    return best_idx

def get_final(text, new_rec):
    if 'drama' in text.lower():
        mask = new_rec['genre'].str.contains('Drama' or 'Romance', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'comedy' in text.lower() or 'slapstick' in text.lower():
        mask = new_rec['genre'].str.contains('Comedy', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'horror' in text.lower() or 'scary' in text.lower():
        mask = new_rec['genre'].str.contains('Horror', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'action' in text.lower() or 'adventure' in text.lower():
        mask = new_rec['genre'].str.contains('Action & Adventure', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'mystery' in text.lower() or 'suspense' in text.lower():
        mask = new_rec['genre'].str.contains('Mystery & Suspense', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'international' in text.lower() or 'art' in text.lower():
        mask = new_rec['genre'].str.contains('Art House & International', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'romantic' in text.lower() or 'romance' in text.lower():
        mask = new_rec['genre'].str.contains('Romance', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    elif 'classic' in text.lower():
        mask = new_rec['genre'].str.contains('Classic', case=False, na=False)
        new_rec = new_rec[mask]
        new_rec = new_rec.sort_values(by = ['score'], ascending = False)[:20]
    else:
        new_rec
    return new_rec

def get_recs(text):
    best_idx = recs_tfidf(text, cvec_matrix)
    cosine = pd.DataFrame(cos)
    new_rec = (df[['title','plot','genre','score','tomatometer_rating','audience_rating','imdb_score']].iloc[best_idx])
    new_rec = pd.concat([new_rec, cosine], axis = 1)
    new_rec = new_rec.sort_values(by = ['score'], ascending = False)
    new_rec.drop_duplicates(subset = 'title', keep = 'first')
    new_rec = get_final(text,new_rec)
    return new_rec

def best_rec(text):
    best_rec = get_recs(text)
    best_rec = best_rec.drop_duplicates(subset = 'title')
    best_rec = best_rec[:10]
    best_rec = best_rec.reset_index()
    best_rec.drop(columns = 'index', inplace = True)
    return best_rec

# Fit Cvec
vectorizer = CountVectorizer(stop_words = 'english', lowercase = True, tokenizer = tokenizer, ngram_range = (1,4)) 
cvec_matrix = vectorizer.fit_transform(df['text'].values)

In [124]:
text = 'Christmas' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,The Nightmare Before Christmas,The film follows the misadventures of Jack Ske...,"Animation, Kids & Family, Science Fiction & Fa...",87.68,95.0,91.0,80.0,0.214373
1,A Christmas Story,Based on the humorous writings of author Jean ...,"Comedy, Drama, Kids & Family",86.92,89.0,88.0,79.0,0.262336
2,White Christmas,Singers Bob Wallace (Bing Crosby) and Phil Dav...,"Classics, Kids & Family, Musical & Performing ...",77.26,77.0,88.0,76.0,0.14825
3,National Lampoon's Christmas Vacation,"As the holidays approach, Clark Griswold (Chev...",Comedy,73.7,67.0,86.0,76.0,0.192879
4,The Man Who Invented Christmas,"In 1843 London, author Charles Dickens finds h...","Comedy, Drama",72.62,79.0,77.0,70.0,0.322886
5,The Christmas Chronicles,Siblings Kate and Teddy Pierce hatch a scheme ...,"Comedy, Kids & Family",68.81,67.0,77.0,71.0,0.173097
6,Get Santa,"After crashing his sleigh, Santa Claus (Jim Br...","Comedy, Kids & Family",64.08,79.0,53.0,62.0,0.157243
7,The Princess Switch,"One week before Christmas, a duchess switches ...",Romance,62.25,75.0,48.0,60.0,0.122628
8,The Night Before,"For the last 10 years, lifelong buddies Ethan ...",Comedy,61.76,68.0,61.0,64.0,0.175035
9,A Christmas Prince,Christmas comes early for an aspiring young jo...,"Comedy, Drama, Romance",59.5,73.0,47.0,58.0,0.197546


In [125]:
text = 'I’m in the mood for a gut-wrenching dramatic movie' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,Schindler's List,Businessman Oskar Schindler (Liam Neeson) arri...,Drama,93.96,97.0,97.0,89.0,0.006129
1,127 Hours,"While exploring a remote canyon in Utah, mount...",Drama,84.03,93.0,85.0,75.0,0.007423
2,"Like Father, Like Son",Ryota learns that his biological son was switc...,Drama,78.7,87.0,89.0,62.0,0.005357
3,Boy,A New Zealand youth (James Rolleston) finds th...,"Comedy, Drama",78.27,87.0,86.0,58.0,0.00502
4,The Attack,An award-winning Palestinian surgeon (Ali Suli...,Drama,77.9,87.0,78.0,71.0,0.00493
5,Detroit,"In the summer of 1967, rioting and civil unres...","Drama, Mystery & Suspense",77.07,82.0,79.0,73.0,0.007279
6,Macbeth,Proud Macbeth (Jon Finch) and his power-mad wi...,Drama,76.96,86.0,73.0,75.0,0.005994
7,The Pool,A wealthy family fascinates a working-class yo...,"Art House & International, Comedy, Drama",76.76,96.0,73.0,55.0,0.005394
8,Middle of Nowhere,A med student (Emayatzy Corinealdi) considers ...,Drama,73.21,88.0,64.0,65.0,0.004959
9,Like Someone in Love,An elderly man (Tadashi Okuno) and a student/e...,"Art House & International, Drama",71.71,83.0,60.0,70.0,0.004901


In [126]:
text = 'College or High School Comedy' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,Dazed and Confused,This coming-of-age film follows the mayhem of ...,Comedy,84.78,92.0,90.0,76.0,0.096225
1,Superbad,High-school seniors Seth (Jonah Hill) and Evan...,Comedy,82.18,88.0,87.0,76.0,0.061096
2,Election,"Jim McAllister (Matthew Broderick), a well-lik...",Comedy,80.65,92.0,79.0,72.0,0.068229
3,The Spectacular Now,"An innocent, bookish teenager (Shailene Woodle...","Comedy, Drama, Romance",78.83,91.0,76.0,71.0,0.075048
4,Grosse Pointe Blank,After assassin Martin Blank (John Cusack) has ...,"Action & Adventure, Comedy, Drama, Mystery & S...",78.13,80.0,87.0,73.0,0.064282
5,Easy A,Prompted by her popular best friend to spill d...,"Comedy, Romance",75.92,85.0,77.0,70.0,0.060816
6,Colma: The Musical,"Three friends (Jake Moreno, H.P. Mendoza, L.A....","Comedy, Drama, Musical & Performing Arts",74.31,90.0,69.0,64.0,0.076451
7,World's Greatest Dad,When the son of high school English teacher La...,"Comedy, Drama",73.36,88.0,67.0,69.0,0.068724
8,Hair High,"As the prom approaches, the head cheerleader (...","Animation, Comedy, Horror, Romance",69.98,75.0,73.0,68.0,0.082158
9,My Entire High School Sinking Into the Sea,High school sophomores Dash and Assaf are best...,"Animation, Comedy, Drama",67.31,85.0,56.0,60.0,0.102151


In [127]:
text = 'Scorsese' 
rec = best_rec(text)
rec

Unnamed: 0,title,plot,genre,score,tomatometer_rating,audience_rating,imdb_score,cosine
0,Taxi Driver,"Suffering from insomnia, disturbed loner Travi...","Drama, Mystery & Suspense",90.37,96.0,93.0,83.0,0.108112
1,Raging Bull,The story of a middleweight boxer as he rises ...,Drama,89.64,93.0,93.0,82.0,0.139516
2,The Departed,South Boston cop Billy Costigan (Leonardo DiCa...,"Action & Adventure, Drama, Mystery & Suspense",88.59,91.0,94.0,85.0,0.10056
3,Mean Streets,A slice of street life in Little Italy among l...,"Classics, Drama",86.03,95.0,84.0,73.0,0.164677
4,The King of Comedy,Rupert Pupkin (Robert De Niro) is a failure in...,"Comedy, Drama, Mystery & Suspense",85.83,89.0,90.0,78.0,0.169907
5,After Hours,"In a Manhattan cafe, word processor Paul Hacke...","Comedy, Mystery & Suspense",84.01,89.0,87.0,77.0,0.151402
6,Casino,"In early-1970s Las Vegas, low-level mobster Sa...",Drama,82.9,80.0,93.0,82.0,0.082447
7,Alice Doesn't Live Here Anymore,"After her husband dies, Alice (Ellen Burstyn) ...","Classics, Drama",81.02,88.0,82.0,73.0,0.116598
8,The Wolf of Wall Street,"In 1987, Jordan Belfort (Leonardo DiCaprio) ta...",Drama,81.0,80.0,83.0,82.0,0.072846
9,The Last Temptation of Christ,"Jesus (Willem Dafoe), a humble Judean carpente...",Drama,80.2,81.0,84.0,75.0,0.073821


Results are basically the same, a couple different movie orders, but the CVEC cosine similarity score tends to be higher than the cosine similarity score for TF-IDF

# Collaborative Recommender System

### Sparse Matrix Factorization

Here we are going to be searching for similar movies to a specific title. 

Cosine Distance

In [128]:
# Creating a pivot table
pivot = df.pivot_table(index = 'title', columns = 'critic_id', values = 'review_score')
# Creating a sparse matrix
pivot_sparse = csr_matrix(pivot.fillna(0))
# Getting the cosine distance
recommender = pairwise_distances(pivot_sparse, metric = 'cosine')
# Creating a dataframe with the cosine distance
recommender_df = pd.DataFrame(recommender, index = pivot.index, columns = pivot.index)

In [129]:
q = 'A Christmas Story'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for A Christmas Story are: title
Stalag 17                      0.002221
Casablanca                     0.256706
Charade                        0.256706
Iron Man                       0.256706
Heaven Can Wait                0.256706
Titanic                        0.256706
Around the World in 80 Days    0.256706
The Magnificent Seven          0.256706
Waiting                        0.256706
Name: A Christmas Story, dtype: float64


In [130]:
q = 'Titanic'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for Titanic are: title
Waiting                        0.0
Titanic                        0.0
Iron Man                       0.0
Around the World in 80 Days    0.0
Heaven Can Wait                0.0
The Magnificent Seven          0.0
Blade                          0.0
The Fugitive                   0.0
Casablanca                     0.0
Name: Titanic, dtype: float64


This isn't performing the way I would want my recommender system to work. While these movies might have something in common, it's hard to tie Titanic to A Christmas Story and Iron Man to Titanic. 

Euclidean

In [131]:
# Creating a pivot table
pivot = df.pivot_table(index = 'title', columns = 'critic_id', values = 'review_score')
# Creating a sparse matrix
pivot_sparse = csr_matrix(pivot.fillna(0))
# Getting the euclidean distance
recommender = pairwise_distances(pivot_sparse, metric = 'euclidean')
# Creating a dataframe with the euclidean distance
recommender_df = pd.DataFrame(recommender, index = pivot.index, columns = pivot.index)

In [132]:
q = 'A Christmas Story'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for A Christmas Story are: title
Stalag 17                      12.500000
The Fugitive                   90.000000
The Magnificent Seven          90.000000
Casablanca                     90.000000
Heaven Can Wait                90.000000
Titanic                        90.863909
Iron Man                       90.863909
Charade                        90.863909
Around the World in 80 Days    93.407708
Name: A Christmas Story, dtype: float64


In [133]:
q = 'Titanic'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for Titanic are: title
Titanic                         0.0
Charade                         0.0
Heaven Can Wait                12.5
Waiting                        12.5
Blade                          12.5
Around the World in 80 Days    12.5
Casablanca                     12.5
The Magnificent Seven          12.5
The Fugitive                   12.5
Name: Titanic, dtype: float64


While some of the results are different, this still isn't performing the way I would need it to. 

Trying cosine distance with tomatometer rating instead. 

In [134]:
# Creating a pivot table
pivot = df.pivot_table(index = 'title', columns = 'critic_id', values = 'tomatometer_rating')
# Creating a sparse matrix
pivot_sparse = csr_matrix(pivot.fillna(0))
# Getting the cosine distance
recommender = pairwise_distances(pivot_sparse, metric = 'cosine')
# Creating a dataframe with the cosine distance
recommender_df = pd.DataFrame(recommender, index = pivot.index, columns = pivot.index)

In [135]:
q = 'A Christmas Story'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for A Christmas Story are: title
Stalag 17                      0.000000
Titanic                        0.292893
The Magnificent Seven          0.292893
Blade                          0.292893
Gigi                           0.292893
Heaven Can Wait                0.292893
Casablanca                     0.292893
Around the World in 80 Days    0.292893
The Fugitive                   0.292893
Name: A Christmas Story, dtype: float64


In [136]:
q = 'Titanic'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for Titanic are: title
The Fugitive                   0.000000
Blade                          0.000000
The Magnificent Seven          0.000000
Casablanca                     0.000000
Titanic                        0.000000
Heaven Can Wait                0.000000
Around the World in 80 Days    0.000000
Iron Man                       0.000000
On the Waterfront              0.292893
Name: Titanic, dtype: float64


Basically the same results. They are still fairly off even though they have a 0 or very close to 0 cosine similarity. 

Trying Manhattan Distance with tomatometer rating instead

In [137]:
# Creating a pivot table
pivot = df.pivot_table(index = 'title', columns = 'critic_id', values = 'tomatometer_rating')
# Creating a sparse matrix
pivot_sparse = csr_matrix(pivot.fillna(0))
# Getting the cosine distance
recommender = pairwise_distances(pivot_sparse, metric = 'cityblock')
# Creating a dataframe with the cosine distance
recommender_df = pd.DataFrame(recommender, index = pivot.index, columns = pivot.index)

In [138]:
q = 'A Christmas Story'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for A Christmas Story are: title
Stalag 17                16.0
Titanic                  89.0
The Magnificent Seven    89.0
Ordinary People          89.0
Heaven Can Wait          90.0
Charade                  94.0
Iron Man                 94.0
The Fugitive             96.0
Gigi                     97.0
Name: A Christmas Story, dtype: float64


In [139]:
q = 'Titanic'

titles = df[df['title'].str.contains(q)]['title']

titles_list = []
for title in titles: 
    if title not in titles_list:
        print(f'The top 10 most recommended movies for {title} are: {recommender_df[title].sort_values(ascending = True)[1:10]}')
    titles_list.append(title)

The top 10 most recommended movies for Titanic are: title
The Magnificent Seven           0.0
Heaven Can Wait                 1.0
Iron Man                        5.0
Charade                         5.0
The Fugitive                    7.0
Casablanca                     10.0
Around the World in 80 Days    20.0
Blade                          33.0
Transylmania                   89.0
Name: Titanic, dtype: float64


Similar results & not performing well. Moving on to SVD. 

### Single Value Decomposition (SVD) Recommender System

The following code has been replicated from an example on Kaggle by user, cast42. [[1]](https://www.kaggle.com/cast42/simple-svd-movie-recommender)

Single Value Decomposition(SVD) of a matrix is a factorization of a matrix into three matrices. Add more info here. Here we will be looking up critic recs and then recommend similiar movies to ones that they have rated. 

In [140]:
# Setting copy 
data = df.copy()

# Setting data dataset
data = data[['rt_id','critic_id','review_score']]

# Setting movie dataset with movie id & title
movies = df[['rt_id','title']]

# Creating critic item df
critic_item = data.groupby(['critic_id','rt_id'])['review_score'].first().unstack(fill_value = 0.0)

critic_item.head(2)

rt_id,m/0814255,m/0878835,m/10,m/1000121-39_steps,m/10002516-lost_city,m/1000253-adams_rib,m/10002635-bridge_of_san_luis_rey,m/10003276-criminal,m/1000355-adventures_of_robin_hood,m/10004288-running_scared,m/10004504-ultraviolet,m/10004697-eternal,m/10004925-matador,m/10005178-saint_ralph,m/10005403-the_baxter,m/10005499-oliver_twist,m/1000560-alamo,m/10005755-g,m/1000581-alfie,m/1000617-aliens,m/1000626-all_about_eve,m/10006270,m/10006324,m/10006343-dirty,m/10006370-stay_alive,m/1000640-all_of_me,m/1000654-all_the_kings_men,m/10006848-the_motel,m/10006856-requiem,m/10007415-amazing_grace,m/10007598-cocaine_cowboys,m/10007714-situation,m/10007898-slipstream,m/10007902-delirious,m/10007916_interview,m/10007947-ten,m/10007985-happening,m/10008005-moliere,m/10008587-beverly_hills_chihuahua,m/10008601-fireflies_in_the_garden,m/10008606-eye,m/10008611-my_best_friends_girl,m/10008617-midnight_meat_train,m/10008618-return,m/10008621-run_fat_boy_run,m/10008655-whiteout,m/10008678-wild_child,m/10008703-rails_and_ties,m/10008744-canvas,m/10008754-flawless,m/10008760-what_happens_in_vegas,m/10008785-appaloosa,m/10008786-good,m/10008820-visitor,m/10008849-married_life,m/10008954-untraceable,m/10008991-informers,m/10009001-the_freshman,m/10009029-kenny,m/10009055-rocker,m/10009063-perfect_holiday,m/10009083-land_of_the_lost,m/10009132-rec,m/10009138-look,m/10009151-box,m/10009169-man_in_the_chair,m/10009192-21,m/10009212-ben_x,m/10009225-body_of_lies,m/10009244-the_grand,m/10009253-the_hammer,m/10009254-shutter,m/10009274-priest,m/10009323-broken,m/10009334-the_escapist,m/10009355-sugar,m/10009420-quid_pro_quo,m/10009421-red,m/10009460-the_road,m/10009462-g_force,m/10009469-the_other_man,m/10009493-duchess,m/10009516-women,m/10009526-public_enemies,m/10009552-julia,m/1000959-angel_heart,m/10009596-old_dogs,m/10009598-surrogates,m/10009624-promotion,m/10009632-notorious,m/10009635-soul_men,m/10009636-igor,m/10009890-stone_angel,m/10009926-alvin_and_the_chipmunks_2,m/10010066-push,m/10010077-felon,m/10010131-scar,m/10010150-unborn,m/10010208-chocolate,m/10010329-splinter,m/10010334-leaves_of_grass,m/10010458-proposal,m/10010552-gigantic,m/10010658-orphan,m/10010662-my_sisters_keeper,m/10010667-hangover,m/10010668-stepfather,m/10010694-season_of_the_witch,m/10010703-shuttle,m/10010760-miss_march,m/10010782-american_affair,m/10010785-grace,m/10010940-happy_tears,m/10011016-messenger,m/1001115-apartment,m/10011235-fame,m/10011262-outrage,m/10011284-skin,m/10011303-helen,m/10011388-blood_the_last_vampire,m/10011435-shrink,m/10011457-ong_bak_2_2009,m/10011774-chloe,m/10011815-hole,m/10011857-high_life,m/1001193-around_the_world_in_80_days,m/10011946-wonderful_world,m/10011949-collapse,m/10011984-last_song,m/10012039-lottery_ticket,m/10012042-going_the_distance,m/10012044-life_as_we_know_it,m/10012051-frozen,m/10012063-ghost_writer,m/10012068-repo_men,m/10012136-winters_bone,m/10012141-kids_are_all_right,m/10012207-stolen,m/10012256-predators,m/1001240-arthur,m/1001280-assault_on_precinct_13,m/1001567-bad_news_bears,m/1002352-big_sleep,m/1002448-birds,m/1002484-black_christmas,m/1002497-black_hole,m/1002512-black_rain,m/1002526-black_sunday,m/1002571-blind_date,m/1002930-born_yesterday,m/1002947-bounty,m/1002993-boys_from_brazil,m/1003075-brewsters_millions,m/1003094-brief_encounter,m/1003318-butch_cassidy_and_the_sundance_kid,m/1003492-cape_fear,m/1003612-carnival_of_souls,m/1003625-carrie,m/1003707-casablanca,m/1003722-casino_royale,m/1003757-cat_people,m/1003883-charade,m/1003952-charlottes_web,m/1004047-children_of_the_corn,m/1004151-christmas_story,m/1004177-cimarron,m/1004330-cleopatra,m/1004485-colors,m/1004567-commando,m/1004906-creature_from_the_black_lagoon,m/1005199-dangerous_liaisons,m/1005360-day_of_the_dead,m/1005371-day_the_earth_stood_still,m/1006037-doctor_zhivago,m/1006207-dr_jekyll_and_mr_hyde,...,m/where_is_kyra,m/where_the_buffalo_roam,m/where_the_money_is,m/where_the_truth_lies,m/where_the_wild_things_are,m/while_were_young,m/while_you_were_sleeping,m/whip_it,m/whiplash_2014,m/whipped,m/whiskey_tango_foxtrot,m/white_bird_in_a_blizzard,m/white_boy_rick,m/white_chicks,m/white_countess,m/white_heat,m/white_house_down,m/white_hunter_black_heart,m/white_material,m/white_men_cant_jump,m/white_noise,m/white_oleander,m/white_squall,m/who_framed_roger_rabbit,m/whole_nine_yards,m/whole_ten_yards,m/whos_afraid_of_virginia_woolf,m/whos_your_caddy,m/why_did_i_get_married,m/why_him,m/wicker_park,m/widows_2018,m/wiener_dog,m/wild_2014,m/wild_america,m/wild_bill,m/wild_hogs,m/wild_target,m/wild_things,m/wild_thornberrys_movie,m/wild_wild_west,m/wilde,m/wildlife_2018,m/wildling,m/william_shakespeares_romeo_and_juliet,m/willow,m/willow_creek,m/wilson_2017,m/wimbledon,m/win_it_all,m/win_win_2011,m/winchester,m/wind_chill,m/wind_river_2017,m/windtalkers,m/wine_country,m/wing_commander,m/wings,m/wings_of_the_dove,m/winnie_the_pooh_2011,m/winter_passing,m/winter_solstice,m/winters_tale_2014,m/wise_blood,m/wish_i_was_here,m/wish_upon,m/wish_you_were_here_2012,m/wishmaster,m/witches_of_eastwick,m/with_honors,m/without_a_paddle,m/without_limits,m/witless_protection,m/wiz,m/wizards,m/wolf_creek,m/wolf_creek_2,m/wolf_man,m/wolfen,m/woman_in_gold,m/woman_of_the_year,m/woman_on_top,m/woman_thou_art_loosed,m/woman_under_the_influence,m/woman_walks_ahead,m/women_in_trouble,m/wonder,m/wonder_boys,m/wonder_park,m/wonder_wheel,m/wonder_woman_2017,m/wonderland,m/wonderstruck,m/wont_back_down_2012,m/wood,m/woodshock_2017,m/woodsman,m/working_girl,m/working_man,m/world-war-z,m/world_is_not_enough,m/world_trade_center,m/worlds_fastest_indian,m/would_you_rather,m/wounds_2019,m/wrestling_ernest_hemingway,m/wristcutters_a_love_story,m/wrong_2011,m/wrong_turn,m/wrongfully_accused,m/x_men_3_the_last_stand,m/x_men_apocalypse,m/x_men_days_of_future_past,m/xanadu,m/xmen,m/xx,m/xx_xy,m/xxx_return_of_xander_cage,m/xxx_state_of_the_union,m/xxy,m/yankee_doodle_dandy,m/yards,m/year_of_living_dangerously,m/year_of_the_dog,m/year_of_the_fish,m/year_one,m/yella,m/yes_god_yes,m/yes_man,m/yoga_hosers,m/yogi_bear_2010,m/yonkers_joe,m/yossi_2013,m/you_again,m/you_got_served,m/you_me_and_dupree,m/you_only_live_twice,m/you_should_have_left,m/you_were_never_really_here,m/you_will_meet_a_tall_dark_stranger,m/young_adult,m/young_black_stallion,m/young_einstein,m/young_frankenstein,m/young_guns,m/young_ones_2014,m/young_sherlock_holmes,m/young_victoria,m/your_highness,m/your_sisters_sister,m/youre_next_2011,m/youre_not_you,m/yours_mine_and_ours,m/youth_2015,m/youth_in_revolt,m/youth_without_youth,m/youve_got_mail,m/z,m/zardoz,m/zathura,m/zaytoun,m/zero_charisma,m/zero_dark_thirty,m/zero_effect,m/zerophilia,m/zipper,m/zodiac,m/zombeavers,m/zombieland,m/zookeeper,m/zoolander,m/zoolander_2,m/zoom_2006,m/zootopia,m/zulu
critic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [141]:
# Top movie ratings from critic 761
critic_item.iloc[761].sort_values(ascending=False).head(10)

rt_id
m/skyfall                   100.0
m/the_amazing_spider_man     80.0
m/good_time                  80.0
m/world-war-z                60.0
m/the_pyramid_2014           40.0
m/the_beguiled               40.0
m/0814255                     0.0
m/piglets_big_movie           0.0
m/pieta_2013                  0.0
m/piercing                    0.0
Name: 5470, dtype: float64

In [142]:
# Setting up svd model with top 50
U, sigma, Vt = svds(critic_item, k = 50)

In [143]:
# Setting up sigma matrix 
sigma_diag_matrix = np.diag(sigma)

In [144]:
# Getting dot product of both arrays
all_user_predicted_ratings = np.dot(np.dot(U, sigma_diag_matrix), Vt)

# Creating dataframe
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = critic_item.columns, index=critic_item.index)

print(preds_df.shape)
preds_df.head(2)

(1423, 6587)


rt_id,m/0814255,m/0878835,m/10,m/1000121-39_steps,m/10002516-lost_city,m/1000253-adams_rib,m/10002635-bridge_of_san_luis_rey,m/10003276-criminal,m/1000355-adventures_of_robin_hood,m/10004288-running_scared,m/10004504-ultraviolet,m/10004697-eternal,m/10004925-matador,m/10005178-saint_ralph,m/10005403-the_baxter,m/10005499-oliver_twist,m/1000560-alamo,m/10005755-g,m/1000581-alfie,m/1000617-aliens,m/1000626-all_about_eve,m/10006270,m/10006324,m/10006343-dirty,m/10006370-stay_alive,m/1000640-all_of_me,m/1000654-all_the_kings_men,m/10006848-the_motel,m/10006856-requiem,m/10007415-amazing_grace,m/10007598-cocaine_cowboys,m/10007714-situation,m/10007898-slipstream,m/10007902-delirious,m/10007916_interview,m/10007947-ten,m/10007985-happening,m/10008005-moliere,m/10008587-beverly_hills_chihuahua,m/10008601-fireflies_in_the_garden,m/10008606-eye,m/10008611-my_best_friends_girl,m/10008617-midnight_meat_train,m/10008618-return,m/10008621-run_fat_boy_run,m/10008655-whiteout,m/10008678-wild_child,m/10008703-rails_and_ties,m/10008744-canvas,m/10008754-flawless,m/10008760-what_happens_in_vegas,m/10008785-appaloosa,m/10008786-good,m/10008820-visitor,m/10008849-married_life,m/10008954-untraceable,m/10008991-informers,m/10009001-the_freshman,m/10009029-kenny,m/10009055-rocker,m/10009063-perfect_holiday,m/10009083-land_of_the_lost,m/10009132-rec,m/10009138-look,m/10009151-box,m/10009169-man_in_the_chair,m/10009192-21,m/10009212-ben_x,m/10009225-body_of_lies,m/10009244-the_grand,m/10009253-the_hammer,m/10009254-shutter,m/10009274-priest,m/10009323-broken,m/10009334-the_escapist,m/10009355-sugar,m/10009420-quid_pro_quo,m/10009421-red,m/10009460-the_road,m/10009462-g_force,m/10009469-the_other_man,m/10009493-duchess,m/10009516-women,m/10009526-public_enemies,m/10009552-julia,m/1000959-angel_heart,m/10009596-old_dogs,m/10009598-surrogates,m/10009624-promotion,m/10009632-notorious,m/10009635-soul_men,m/10009636-igor,m/10009890-stone_angel,m/10009926-alvin_and_the_chipmunks_2,m/10010066-push,m/10010077-felon,m/10010131-scar,m/10010150-unborn,m/10010208-chocolate,m/10010329-splinter,m/10010334-leaves_of_grass,m/10010458-proposal,m/10010552-gigantic,m/10010658-orphan,m/10010662-my_sisters_keeper,m/10010667-hangover,m/10010668-stepfather,m/10010694-season_of_the_witch,m/10010703-shuttle,m/10010760-miss_march,m/10010782-american_affair,m/10010785-grace,m/10010940-happy_tears,m/10011016-messenger,m/1001115-apartment,m/10011235-fame,m/10011262-outrage,m/10011284-skin,m/10011303-helen,m/10011388-blood_the_last_vampire,m/10011435-shrink,m/10011457-ong_bak_2_2009,m/10011774-chloe,m/10011815-hole,m/10011857-high_life,m/1001193-around_the_world_in_80_days,m/10011946-wonderful_world,m/10011949-collapse,m/10011984-last_song,m/10012039-lottery_ticket,m/10012042-going_the_distance,m/10012044-life_as_we_know_it,m/10012051-frozen,m/10012063-ghost_writer,m/10012068-repo_men,m/10012136-winters_bone,m/10012141-kids_are_all_right,m/10012207-stolen,m/10012256-predators,m/1001240-arthur,m/1001280-assault_on_precinct_13,m/1001567-bad_news_bears,m/1002352-big_sleep,m/1002448-birds,m/1002484-black_christmas,m/1002497-black_hole,m/1002512-black_rain,m/1002526-black_sunday,m/1002571-blind_date,m/1002930-born_yesterday,m/1002947-bounty,m/1002993-boys_from_brazil,m/1003075-brewsters_millions,m/1003094-brief_encounter,m/1003318-butch_cassidy_and_the_sundance_kid,m/1003492-cape_fear,m/1003612-carnival_of_souls,m/1003625-carrie,m/1003707-casablanca,m/1003722-casino_royale,m/1003757-cat_people,m/1003883-charade,m/1003952-charlottes_web,m/1004047-children_of_the_corn,m/1004151-christmas_story,m/1004177-cimarron,m/1004330-cleopatra,m/1004485-colors,m/1004567-commando,m/1004906-creature_from_the_black_lagoon,m/1005199-dangerous_liaisons,m/1005360-day_of_the_dead,m/1005371-day_the_earth_stood_still,m/1006037-doctor_zhivago,m/1006207-dr_jekyll_and_mr_hyde,...,m/where_is_kyra,m/where_the_buffalo_roam,m/where_the_money_is,m/where_the_truth_lies,m/where_the_wild_things_are,m/while_were_young,m/while_you_were_sleeping,m/whip_it,m/whiplash_2014,m/whipped,m/whiskey_tango_foxtrot,m/white_bird_in_a_blizzard,m/white_boy_rick,m/white_chicks,m/white_countess,m/white_heat,m/white_house_down,m/white_hunter_black_heart,m/white_material,m/white_men_cant_jump,m/white_noise,m/white_oleander,m/white_squall,m/who_framed_roger_rabbit,m/whole_nine_yards,m/whole_ten_yards,m/whos_afraid_of_virginia_woolf,m/whos_your_caddy,m/why_did_i_get_married,m/why_him,m/wicker_park,m/widows_2018,m/wiener_dog,m/wild_2014,m/wild_america,m/wild_bill,m/wild_hogs,m/wild_target,m/wild_things,m/wild_thornberrys_movie,m/wild_wild_west,m/wilde,m/wildlife_2018,m/wildling,m/william_shakespeares_romeo_and_juliet,m/willow,m/willow_creek,m/wilson_2017,m/wimbledon,m/win_it_all,m/win_win_2011,m/winchester,m/wind_chill,m/wind_river_2017,m/windtalkers,m/wine_country,m/wing_commander,m/wings,m/wings_of_the_dove,m/winnie_the_pooh_2011,m/winter_passing,m/winter_solstice,m/winters_tale_2014,m/wise_blood,m/wish_i_was_here,m/wish_upon,m/wish_you_were_here_2012,m/wishmaster,m/witches_of_eastwick,m/with_honors,m/without_a_paddle,m/without_limits,m/witless_protection,m/wiz,m/wizards,m/wolf_creek,m/wolf_creek_2,m/wolf_man,m/wolfen,m/woman_in_gold,m/woman_of_the_year,m/woman_on_top,m/woman_thou_art_loosed,m/woman_under_the_influence,m/woman_walks_ahead,m/women_in_trouble,m/wonder,m/wonder_boys,m/wonder_park,m/wonder_wheel,m/wonder_woman_2017,m/wonderland,m/wonderstruck,m/wont_back_down_2012,m/wood,m/woodshock_2017,m/woodsman,m/working_girl,m/working_man,m/world-war-z,m/world_is_not_enough,m/world_trade_center,m/worlds_fastest_indian,m/would_you_rather,m/wounds_2019,m/wrestling_ernest_hemingway,m/wristcutters_a_love_story,m/wrong_2011,m/wrong_turn,m/wrongfully_accused,m/x_men_3_the_last_stand,m/x_men_apocalypse,m/x_men_days_of_future_past,m/xanadu,m/xmen,m/xx,m/xx_xy,m/xxx_return_of_xander_cage,m/xxx_state_of_the_union,m/xxy,m/yankee_doodle_dandy,m/yards,m/year_of_living_dangerously,m/year_of_the_dog,m/year_of_the_fish,m/year_one,m/yella,m/yes_god_yes,m/yes_man,m/yoga_hosers,m/yogi_bear_2010,m/yonkers_joe,m/yossi_2013,m/you_again,m/you_got_served,m/you_me_and_dupree,m/you_only_live_twice,m/you_should_have_left,m/you_were_never_really_here,m/you_will_meet_a_tall_dark_stranger,m/young_adult,m/young_black_stallion,m/young_einstein,m/young_frankenstein,m/young_guns,m/young_ones_2014,m/young_sherlock_holmes,m/young_victoria,m/your_highness,m/your_sisters_sister,m/youre_next_2011,m/youre_not_you,m/yours_mine_and_ours,m/youth_2015,m/youth_in_revolt,m/youth_without_youth,m/youve_got_mail,m/z,m/zardoz,m/zathura,m/zaytoun,m/zero_charisma,m/zero_dark_thirty,m/zero_effect,m/zerophilia,m/zipper,m/zodiac,m/zombeavers,m/zombieland,m/zookeeper,m/zoolander,m/zoolander_2,m/zoom_2006,m/zootopia,m/zulu
critic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1
1,-0.000864,0.07205,-0.002165,-0.000146,-0.000882,-0.032413,-0.000223,7.6e-05,-0.015708,-0.000398,-6.925325e-06,0.000589,0.069981,-0.000415,-0.001807,0.011593,0.003901,-2.307438e-05,0.015587,0.007485,-0.018792,0.001674,0.001466,-2.307438e-05,-0.000506,-0.001859,-0.001325,-0.015381,-0.018829,-0.000299,0.009659,-0.00128,-0.031231,-0.002023,-0.000423,-0.029097,0.024062,-0.000209,0.000201,4.3e-05,2.9e-05,-0.003481,-0.006247,0.010644,-0.019896,-0.001757,-0.001776,-0.027826,0.002384,-0.031231,0.004402,0.002995,-0.015646,-0.024618,-0.002099,-0.009619,-0.000758,-0.020949,-4.7e-05,-0.002356,0.045987,-0.011115,0.000106,-0.039594,-4.7e-05,-0.036901,-0.022373,0.000442,-0.008259,-0.032751,0.017022,0.001239,0.000219,0.036455,-0.033183,0.000243,0.006374,-0.00177,0.002364,0.000177,-0.020087,-0.012194,0.004121,-0.011184,-0.00057,0.001096,0.013218,-0.019998,-0.030967,-0.000842,0.024946,0.006504,1.4992730000000003e-17,0.001242,0.077974,0.005254,-1e-05,-0.010242,-0.002416,-0.003971,-0.023431,-0.003708,-0.01884,0.015351,-0.010192,0.020388,0.000939,-0.001007,-0.001072,0.007285,-0.03152,-0.002275,-0.001216,-0.000601,-0.024449,-0.001476,-0.002917,0.009758,0.006505,-0.026786,-0.013396,-0.004036,-0.010475,-0.002221,-5.3253790000000004e-17,0.005393,0.001031,-0.030137,0.00024,0.035115,8.738337e-05,-0.008379,0.001551,0.009435,0.011001,0.009168,0.037979,0.008185,0.002511,-0.002275,0.002578,0.005142,-0.022644,0.010094,-0.002023,0.003711,-0.001093,0.010933,0.00053,0.00165,-0.008775,0.004891,-0.001666,0.040531,-0.041783,0.055278,-0.007235,-0.002805,0.00719,-0.001011,0.025979,0.006291,0.002946,-0.001561,0.001775,-0.002338,-0.018818,-0.000493,0.003467,0.022324,0.00395,-0.001452,-0.004575,0.001268,-0.00568,...,2.203371,0.003009,0.004562,-0.010344,-0.015916,0.070048,0.007418,0.033607,-0.011569,0.003254,-0.001633,-0.16991,1.657643,0.000446,0.034878,5e-06,-4e-06,0.003582,0.00025,8.4e-05,-0.000155,0.000204,-0.004678,-0.014187,0.004571,-0.015634,-0.010189,-1.8e-05,-0.000323,0.015477,0.021322,0.043049,0.039243,0.002844,0.000752,-0.005346,0.010587,0.000252,-0.038592,0.000253,-0.028448,-0.002868,0.248446,0.058551,-0.0007,-0.000427,-0.029232,0.000114,-0.002998,0.069796,0.050535,7e-06,-0.000716,0.13716,0.002866,0.000159,-0.00012,-0.002092,-0.003461,0.02007,-0.024279,-0.014849,0.093235,-0.002787,0.000137,0.019799,0.046173,-4.5e-05,0.000503,0.00053,0.003473,0.004665,-9.695456e-06,0.000168,0.027997,-0.003366,0.000797,-0.008673,-0.005533,0.026353,0.011745,-0.000219,-0.036901,0.003057,0.789556,0.072406,0.039948,-0.01123,-0.003566,2.713674,0.062579,0.010967,0.007221,-0.02992,-0.002245,0.052761,-0.001757,0.00087,0.024523,0.00063,-0.005901,-0.036616,-0.00035,-4e-06,0.001324,-0.002286,0.016157,0.003662,-0.002463,0.000671,0.006327,0.040602,0.04714,-0.000301,0.00011,-0.000661,0.012804,-6.043833e-18,-0.005951,0.069668,-0.001033,0.00032,-0.013188,-0.027398,0.000265,-0.011678,0.095433,-0.002868,-0.008848,0.006845,0.000412,-0.000578,0.001002,0.000677,0.000286,-0.00523,-0.018987,-0.023904,2.287038,0.041935,-0.026256,8.924421e-06,-0.002531,-0.008257,-0.0019,0.02708,0.004629,-1.3e-05,0.001206,-0.031117,-0.029278,0.018712,8e-06,0.009812,0.017661,-0.00445,0.016751,0.011821,0.017267,0.005083,0.013872,-0.26336,-0.030098,-0.00218,-0.020707,-0.107063,-0.018103,0.151061,-0.011441,0.000458,0.01066,-0.000134,-0.000211,-0.001066,-0.005512
3,0.001247,0.004778,-0.004651,-8e-06,-0.000213,0.017829,-0.000607,-0.002685,-0.011204,-0.000126,3.160081e-08,-0.005495,-0.003319,4.9e-05,0.00301,0.000188,0.006136,-4.471324e-08,-0.001046,0.043464,-0.002439,-0.001162,-0.008361,-4.471324e-08,-0.015151,-0.010435,-0.003811,0.001009,0.001225,-4.6e-05,0.007468,-0.000428,0.000838,-0.008213,5.4e-05,-0.006453,0.000718,2.2e-05,6e-06,-7.8e-05,-0.061155,0.003294,-0.009274,0.005388,-0.004609,-0.005638,0.002119,0.001162,-0.013652,0.000838,0.001372,0.009263,0.001041,-0.006742,0.002786,-0.001621,-0.00308,0.002961,0.000103,-0.007608,-0.002336,-0.001848,0.022174,-0.004361,2e-06,-0.014347,-0.004317,0.02457,-0.002775,-0.012709,2.6e-05,-0.001162,-1.4e-05,-0.016067,0.000891,-0.000104,-0.004777,-0.007186,-0.000173,5e-05,0.002536,-0.001172,0.001219,-0.005871,-0.000108,-0.020575,-0.000232,-8.3e-05,-0.004725,0.001196,-0.010438,-0.001329,1.2807240000000001e-17,-9e-05,0.010246,0.005168,0.00019,-0.00088,-0.001601,-3.4e-05,0.001544,-0.002458,0.001226,-0.000679,-0.003264,1.1e-05,-0.00088,0.001048,-6.9e-05,-0.000442,-0.004992,-0.009239,-0.00085,-0.002145,-0.009879,-0.004729,-0.009346,-2.9e-05,-1.9e-05,-0.004302,0.001884,-0.002331,0.011588,0.002242,5.396685e-18,0.002158,0.001342,0.006414,-0.000289,0.000315,2.622219e-07,-0.001319,-0.000174,-0.014437,0.002575,-0.001267,-0.003739,0.00027,0.016941,-0.009239,-0.030508,-0.005438,-0.001796,0.025997,-0.008213,0.008917,-0.014425,-0.006977,-0.010332,-0.00338,0.017909,-0.046006,0.001681,0.00539,0.031763,0.006288,-0.140635,0.004927,0.002878,-0.004106,-0.001744,0.002518,-0.034866,-0.006249,-0.02137,0.004106,0.003074,-0.013986,-0.011343,-0.001652,-0.051622,-0.013385,1.170451,-0.012373,0.01141,...,-0.000393,-0.00028,-0.003313,-0.00483,-0.004089,0.000863,-0.025497,-0.002794,-0.000264,-0.004689,-1.6e-05,-0.000802,-2e-05,-0.069248,-0.028498,-0.006509,-1.4e-05,-0.047264,-0.000106,-0.014467,-0.016468,-0.000602,-0.0226,-0.00936,-0.005439,0.002209,-0.086808,0.001021,-6.1e-05,-0.000822,0.000827,-0.000197,-0.099985,-0.002338,0.004893,0.002455,0.003308,1e-05,-0.010707,-0.001214,-0.050242,-0.03063,0.000152,-0.000277,-0.000869,-0.014983,-0.001592,-0.000965,-0.05896,-0.009672,0.014183,6.4e-05,-0.000606,-0.007673,-0.013154,-0.000118,-0.000581,0.003261,-0.015498,-0.002199,0.003863,-0.001834,0.012929,-0.00022,-0.000246,-4.5e-05,-0.009425,4.9e-05,-0.018275,-0.010332,-0.001818,-0.032361,4.424114e-08,-0.026261,0.000845,-0.011651,0.000247,1.216798,0.707893,-0.00027,-0.00052,-0.000336,-0.014347,-0.04268,0.000736,0.004935,0.000263,-0.020847,0.006277,-0.000507,-0.000888,0.003491,-2.4e-05,0.000782,-0.007336,-0.000528,-0.00563,-0.016722,-0.001381,0.000171,0.00147,-0.000913,-0.000324,6e-06,1.2e-05,-0.007266,-0.008413,-0.000685,-0.005439,-0.001945,0.001966,-0.00153,-0.00037,0.007073,2.3e-05,1.9e-05,-0.006152,3.841325e-18,-0.007603,-0.014225,-0.019177,-0.007951,0.015235,-0.007532,-0.00017,-0.001859,-0.013292,3.5e-05,-0.002836,-0.046462,2.8e-05,-0.000145,-0.000208,0.000148,-3.7e-05,-0.000213,0.005128,0.000323,-0.001563,0.00119,0.001731,5.914629e-07,-0.053983,0.02677,-0.00113,-0.002568,0.001546,0.00024,0.0015,-0.000127,-0.00763,-0.001074,1.8e-05,0.002903,-0.007525,-0.001449,-0.00797,0.018594,-0.015636,-0.000333,-0.001493,-0.001627,-0.005922,-0.009332,-0.01736,-0.000198,-0.005689,-0.008488,-0.00442,0.000118,-0.015416,7.7e-05,2.2e-05,1.4e-05,1.410181


In [145]:
preds_df.index.value_counts()

1        1
7076     1
6939     1
6926     1
6906     1
        ..
3465     1
3460     1
3459     1
3454     1
10098    1
Name: critic_id, Length: 1423, dtype: int64

In [146]:
# Top 10 recommended movies by critic 3454
critic_item.loc[3454].sort_values(ascending=False).head(10)

rt_id
m/three_kings                     100.0
m/toy_story_2                     100.0
m/slap_shot                       100.0
m/1015380-odd_couple              100.0
m/bringing_out_the_dead            91.0
m/tampopo                          80.0
m/analyze_this                     58.0
m/universal_soldier_the_return     25.0
m/battlefield_earth                12.5
m/pieta_2013                        0.0
Name: 3454, dtype: float64

In [148]:
# Setting all critic recs to series
movies_critic_3454 = critic_item.loc[3454]

In [149]:
# Recommended by critic with over 25% 
high_rated_movies_3454 = movies_critic_3454[movies_critic_3454 > 25].index

high_rated_movies_3454

Index(['m/1015380-odd_couple', 'm/analyze_this', 'm/bringing_out_the_dead',
       'm/slap_shot', 'm/tampopo', 'm/three_kings', 'm/toy_story_2'],
      dtype='object', name='rt_id')

In [151]:
# Getting movie recommendations for critic 3454
movies_recommended_for_3454 = preds_df.loc[3454]

# Getting recs for over 10%
movies_high_recommend_for_3454 = movies_recommended_for_3454[movies_recommended_for_3454 > 25].index

In [152]:
# None of the recs for this critic have been rated by this person yet
movies_high_recommend_for_3454

Index([], dtype='object', name='rt_id')

In [153]:
# Seeing what movies have a recommendation of 25% or higher that have no rating yet
recs_3454 = (set(movies_high_recommend_for_3454) - set(high_rated_movies_3454))
list(recs_3454)[:10]

[]

In [158]:
# Getting top recs for critic ids 

def get_high_recommended_movies(userId):
    movies_rated_by_user = critic_item.iloc[userId]
    movies_high_rated_by_user =  movies_rated_by_user[movies_rated_by_user > 3].index
    movies_recommended_for_user = preds_df.iloc[userId]
    movies_high_recommend_for_user = movies_recommended_for_user[movies_recommended_for_user > 3].index
    return set(movies_high_recommend_for_user) - set(movies_high_rated_by_user)

In [159]:
# No recs for this critic
list(get_high_recommended_movies(761))[:10]

[]

In [160]:
# No recs for this critic
list(get_high_recommended_movies(84))[:10]

[]

In [161]:
# Critic recs for critic 0 
list(get_high_recommended_movies(0))[:10]

['m/moonlight_2016',
 'm/short_term_12_2013',
 'm/menashe',
 'm/12_years_a_slave',
 'm/dont_think_twice',
 'm/we_the_animals']

While this method looks to be successful, I want to test out KNN before evaluating further. 

#### KNN 

The following code has been adapted from Susan Li's post on Towards Data Science [[1]](https://towardsdatascience.com/how-did-we-build-book-recommender-systems-in-an-hour-part-2-k-nearest-neighbors-and-matrix-c04b3c2ef55c)

In [168]:
data = df.groupby('title').head(1)

In [169]:
# Creating pivot
pivot = data.pivot(index = 'title', columns = 'critic_id', values = 'score').fillna(0)
# Creating matrix 
matrix = csr_matrix(pivot.values)

# Instantiating Model
knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
# Fitting model
knn.fit(matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [170]:
# Testing Model
query_idx = np.random.choice(pivot.shape[0])
query_idx

3965

In [171]:
dist, idx = knn.kneighbors(pivot.iloc[query_idx, :].values.reshape(1,-1), n_neighbors = 5)

In [172]:
for i in range(0, len(dist.flatten())):
    if i == 0: 
        print('Recommendations for {0}:\n'.format(pivot.index[query_idx]))
    else:
        print('{0}:{1}, with distances of {2}:'.format(i, pivot.index[idx.flatten()[i]], dist.flatten()[i]))

Recommendations for RocknRolla:

1:St. Trinian's, with distances of 0.0:
2:The Bounty Hunter, with distances of 0.0:
3:6 Souls, with distances of 0.0:
4:Bug, with distances of 0.0:


In [173]:
# Trying again 
query_idx = np.random.choice(pivot.shape[0])
query_idx

6042

In [174]:
dist, idx = knn.kneighbors(pivot.iloc[query_idx, :].values.reshape(1,-1), n_neighbors = 5)

In [175]:
for i in range(0, len(dist.flatten())):
    if i == 0: 
        print('Recommendations for {0}:\n'.format(pivot.index[query_idx]))
    else:
        print('{0}:{1}, with distances of {2}:'.format(i, pivot.index[idx.flatten()[i]], dist.flatten()[i]))

Recommendations for To Die For:

1:Don't Look Now, with distances of 0.0:
2:Re-Animator, with distances of 0.0:
3:Green Zone, with distances of 0.0:
4:The Insider, with distances of 0.0:


While this method looks to be successful, I want to focus on the columns with string values(objects) for my recommender system. I am going to try spaCy on Google Colab. Please go to 05.2_Recommender_System_spaCy.ipynb To view these models. 