In [1]:
import pandas as pd
import numpy as np
import pandas_profiling
import random
import os
import itertools

# display, plots
from IPython.display import display_html
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns

# recommender systems
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k, reciprocal_rank
from lightfm import LightFM
from scipy.sparse import csr_matrix
from sklearn.metrics import mean_squared_error

# warning
import warnings
warnings.filterwarnings("ignore")



In [2]:
pd.set_option("max_columns", 100) 
pd.set_option("max_rows", 100)

In [3]:
# import the data
tags = pd.read_csv('ml-25m/genome-tags.csv')
movies = pd.read_csv('ml-25m/movies.csv')
user_tags = pd.read_csv('ml-25m/tags.csv')
tag_scores = pd.read_csv('ml-25m/genome-scores.csv')
ratings = pd.read_csv('ml-25m/ratings.csv')
links = pd.read_csv('ml-25m/links.csv')

In [4]:
# merge the movies and its ratings from different users
movies_and_ratings = pd.merge(movies, ratings, on='movieId')
movies_and_ratings.head(3)

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,2,3.5,1141415820
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3,4.0,1439472215
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4,3.0,1573944252


## Movie Metadata Part One

In [5]:
# merge the tags and tag scores, then pivot the table and merge the new dataframe
# with the movies_and_ratings to generate a new dataframe that rows represent all the movies
# and columns represent all the tags
tags_and_tag_scores = pd.merge(tags,tag_scores, on='tagId')
tats_ = tags_and_tag_scores.pivot(index='movieId', columns='tag', values='relevance')
tats = pd.merge(tats_,movies_and_ratings[['title','movieId']].drop_duplicates(),on='movieId')
col = tats.pop('title')
tats.insert(0, col.name, col)
tats.head(3)

Unnamed: 0,title,movieId,007,007 (series),18th century,1920s,1930s,1950s,1960s,1970s,1980s,19th century,3d,70mm,80s,9/11,aardman,aardman studios,abortion,absurd,action,action packed,adaptation,adapted from:book,adapted from:comic,adapted from:game,addiction,adolescence,adoption,adultery,adventure,affectionate,afi 100,afi 100 (laughs),afi 100 (movie quotes),africa,afterlife,aging,aids,airplane,airport,alaska,alcatraz,alcoholism,alien,alien invasion,aliens,allegory,almodovar,alone in the world,...,vietnam,vietnam war,view askew,vigilante,vigilantism,violence,violent,virginity,virtual reality,virus,visceral,visual,visually appealing,visually stunning,visuals,voodoo,voyeurism,war,war movie,wartime,waste of time,watch the credits,weapons,wedding,weed,weird,werewolf,werewolves,western,whimsical,wilderness,wine,wistful,witch,witches,witty,wizards,women,working class,workplace,world politics,world war i,world war ii,writer's life,writers,writing,wuxia,wwii,zombie,zombies
0,Toy Story (1995),1,0.02875,0.02375,0.0625,0.07575,0.14075,0.14675,0.0635,0.20375,0.202,0.03075,0.58025,0.1025,0.20175,0.007,0.0245,0.17275,0.0165,0.104,0.6625,0.30075,0.31675,0.286,0.06275,0.01925,0.05875,0.07925,0.1965,0.09325,0.89375,0.67625,0.03575,0.23325,0.3075,0.0545,0.038,0.212,0.08,0.0155,0.0195,0.01925,0.015,0.03,0.09075,0.0225,0.246,0.246,0.0105,0.16675,...,0.0115,0.00525,0.02375,0.06125,0.06325,0.23375,0.173,0.02925,0.0265,0.0215,0.1515,0.56375,0.315,0.67325,0.4365,0.0155,0.0705,0.0655,0.05225,0.12775,0.004,0.0975,0.26375,0.02175,0.0225,0.427,0.07175,0.014,0.0355,0.587,0.08775,0.03775,0.0715,0.03325,0.1055,0.694,0.108,0.08925,0.055,0.077,0.0405,0.01425,0.0305,0.035,0.14125,0.05775,0.039,0.02975,0.08475,0.022
1,Jumanji (1995),2,0.04125,0.0405,0.06275,0.08275,0.091,0.06125,0.06925,0.096,0.0765,0.0525,0.088,0.10275,0.14525,0.007,0.0045,0.0275,0.01075,0.15925,0.64025,0.36725,0.5145,0.4845,0.19725,0.24875,0.08325,0.157,0.1455,0.05825,0.976,0.12675,0.0105,0.0735,0.07075,0.069,0.10975,0.29775,0.062,0.02,0.0275,0.01725,0.01625,0.02,0.07225,0.08,0.08725,0.1775,0.00625,0.3375,...,0.00975,0.0025,0.02725,0.04175,0.0415,0.18475,0.113,0.02925,0.06625,0.056,0.07325,0.3815,0.225,0.217,0.31525,0.02225,0.05375,0.03775,0.03075,0.058,0.00475,0.0375,0.178,0.016,0.02075,0.3865,0.0965,0.02425,0.013,0.2925,0.09375,0.04975,0.02225,0.243,0.38525,0.18725,0.217,0.13525,0.018,0.08725,0.0525,0.01575,0.0125,0.02,0.12225,0.03275,0.021,0.011,0.10525,0.01975
2,Grumpier Old Men (1995),3,0.04675,0.0555,0.02925,0.087,0.0475,0.04775,0.046,0.14275,0.0285,0.03875,0.0245,0.06475,0.081,0.0055,0.005,0.03475,0.00675,0.11375,0.16025,0.14075,0.252,0.19375,0.10425,0.01675,0.04875,0.0645,0.13175,0.08625,0.3215,0.0955,0.012,0.03575,0.05975,0.06625,0.053,0.28575,0.05275,0.029,0.0365,0.0175,0.02,0.01325,0.02275,0.0235,0.03525,0.10775,0.00575,0.0205,...,0.00825,0.003,0.0485,0.08525,0.06525,0.1345,0.09825,0.04975,0.01775,0.02375,0.10175,0.10725,0.196,0.0985,0.0495,0.021,0.0625,0.035,0.0315,0.076,0.02,0.04725,0.14125,0.42725,0.03525,0.2435,0.061,0.02,0.038,0.13025,0.13325,0.0395,0.025,0.0195,0.0295,0.22325,0.0265,0.35075,0.0215,0.07325,0.06275,0.0195,0.02225,0.023,0.122,0.03475,0.017,0.018,0.091,0.01775


In [6]:
# for meaningful pandas_profiling statistics, replace blank cells with NaN
tats.replace('', np.nan, inplace=True)
profile = pandas_profiling.ProfileReport(tats[['animals', 'animated', 'anti-hero', 'based on a video game']])
profile.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
# feature transformation: rounding to 1st decimal place
tats.replace(np.nan, 0, inplace=True)
tats[tats.columns[2:]] = tats[tats.columns[2:]].apply(lambda x: round(x, 1))
tats.head(3)

Unnamed: 0,title,movieId,007,007 (series),18th century,1920s,1930s,1950s,1960s,1970s,1980s,19th century,3d,70mm,80s,9/11,aardman,aardman studios,abortion,absurd,action,action packed,adaptation,adapted from:book,adapted from:comic,adapted from:game,addiction,adolescence,adoption,adultery,adventure,affectionate,afi 100,afi 100 (laughs),afi 100 (movie quotes),africa,afterlife,aging,aids,airplane,airport,alaska,alcatraz,alcoholism,alien,alien invasion,aliens,allegory,almodovar,alone in the world,...,vietnam,vietnam war,view askew,vigilante,vigilantism,violence,violent,virginity,virtual reality,virus,visceral,visual,visually appealing,visually stunning,visuals,voodoo,voyeurism,war,war movie,wartime,waste of time,watch the credits,weapons,wedding,weed,weird,werewolf,werewolves,western,whimsical,wilderness,wine,wistful,witch,witches,witty,wizards,women,working class,workplace,world politics,world war i,world war ii,writer's life,writers,writing,wuxia,wwii,zombie,zombies
0,Toy Story (1995),1,0.0,0.0,0.1,0.1,0.1,0.1,0.1,0.2,0.2,0.0,0.6,0.1,0.2,0.0,0.0,0.2,0.0,0.1,0.7,0.3,0.3,0.3,0.1,0.0,0.1,0.1,0.2,0.1,0.9,0.7,0.0,0.2,0.3,0.1,0.0,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.2,0.2,0.0,0.2,...,0.0,0.0,0.0,0.1,0.1,0.2,0.2,0.0,0.0,0.0,0.2,0.6,0.3,0.7,0.4,0.0,0.1,0.1,0.1,0.1,0.0,0.1,0.3,0.0,0.0,0.4,0.1,0.0,0.0,0.6,0.1,0.0,0.1,0.0,0.1,0.7,0.1,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.1,0.0
1,Jumanji (1995),2,0.0,0.0,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.2,0.6,0.4,0.5,0.5,0.2,0.2,0.1,0.2,0.1,0.1,1.0,0.1,0.0,0.1,0.1,0.1,0.1,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.2,0.0,0.3,...,0.0,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.1,0.1,0.1,0.4,0.2,0.2,0.3,0.0,0.1,0.0,0.0,0.1,0.0,0.0,0.2,0.0,0.0,0.4,0.1,0.0,0.0,0.3,0.1,0.0,0.0,0.2,0.4,0.2,0.2,0.1,0.0,0.1,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0
2,Grumpier Old Men (1995),3,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.1,0.2,0.1,0.3,0.2,0.1,0.0,0.0,0.1,0.1,0.1,0.3,0.1,0.0,0.0,0.1,0.1,0.1,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,...,0.0,0.0,0.0,0.1,0.1,0.1,0.1,0.0,0.0,0.0,0.1,0.1,0.2,0.1,0.0,0.0,0.1,0.0,0.0,0.1,0.0,0.0,0.1,0.4,0.0,0.2,0.1,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.2,0.0,0.4,0.0,0.1,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0


In [8]:
# Perform one-hot encoding on the data
movies_metadata = pd.get_dummies(tats, columns = tats.columns[2:])
movies_metadata.head(3)

Unnamed: 0,title,movieId,007_0.0,007_0.1,007_0.2,007_0.3,007_0.4,007_0.5,007_0.6,007_0.7,007_0.8,007_0.9,007_1.0,007 (series)_0.0,007 (series)_0.1,007 (series)_0.2,007 (series)_0.3,007 (series)_0.4,007 (series)_0.5,007 (series)_0.6,007 (series)_0.7,007 (series)_0.8,007 (series)_0.9,007 (series)_1.0,18th century_0.0,18th century_0.1,18th century_0.2,18th century_0.3,18th century_0.4,18th century_0.5,18th century_0.6,18th century_0.7,18th century_0.8,18th century_0.9,18th century_1.0,1920s_0.0,1920s_0.1,1920s_0.2,1920s_0.3,1920s_0.4,1920s_0.5,1920s_0.6,1920s_0.7,1920s_0.8,1920s_0.9,1930s_0.0,1930s_0.1,1930s_0.2,1930s_0.3,1930s_0.4,...,writing_0.5,writing_0.6,writing_0.7,writing_0.8,writing_0.9,writing_1.0,wuxia_0.0,wuxia_0.1,wuxia_0.2,wuxia_0.3,wuxia_0.4,wuxia_0.5,wuxia_0.6,wuxia_0.7,wuxia_0.8,wuxia_0.9,wuxia_1.0,wwii_0.0,wwii_0.1,wwii_0.2,wwii_0.3,wwii_0.4,wwii_0.5,wwii_0.6,wwii_0.7,wwii_0.8,wwii_0.9,wwii_1.0,zombie_0.0,zombie_0.1,zombie_0.2,zombie_0.3,zombie_0.4,zombie_0.5,zombie_0.6,zombie_0.7,zombie_0.8,zombie_0.9,zombie_1.0,zombies_0.0,zombies_0.1,zombies_0.2,zombies_0.3,zombies_0.4,zombies_0.5,zombies_0.6,zombies_0.7,zombies_0.8,zombies_0.9,zombies_1.0
0,Toy Story (1995),1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,Jumanji (1995),2,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,Grumpier Old Men (1995),3,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


## User and movie interaction data

In [37]:
# limit the movie metadata to selected features
features = ['userId', 'movieId', 'rating', 'genres']
interactions= movies_and_ratings[features]
interactions.replace('', np.nan, inplace=True)
interactions.head(3)

Unnamed: 0,userId,movieId,rating,genres
0,2,1,3.5,Adventure|Animation|Children|Comedy|Fantasy
1,3,1,4.0,Adventure|Animation|Children|Comedy|Fantasy
2,4,1,3.0,Adventure|Animation|Children|Comedy|Fantasy


In [10]:
# for meaningful pandas_profiling statistics, replace blank cells with NaN
profile = pandas_profiling.ProfileReport(interactions[['userId', 'movieId', 'rating', 'genres']])
profile.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

In [38]:
# filtering the interactions data with only the movies that we have tats data
movies = tats['movieId'].tolist()
interactions = interactions[interactions['movieId'].isin(movies)]

In [39]:
# randomly select 9000 users interactions data
interactions_random = interactions[interactions['userId'].isin(
               random.sample(list(interactions['userId'].unique()), k=9000))]
interactions_random.head()

Unnamed: 0,userId,movieId,rating,genres
7,13,1,4.0,Adventure|Animation|Children|Comedy|Fantasy
33,111,1,4.5,Adventure|Animation|Children|Comedy|Fantasy
44,160,1,4.5,Adventure|Animation|Children|Comedy|Fantasy
65,227,1,4.0,Adventure|Animation|Children|Comedy|Fantasy
73,243,1,2.5,Adventure|Animation|Children|Comedy|Fantasy


In [40]:
# on-hot encoding 
user_movie_interaction = pd.pivot_table(interactions_random, index='userId', columns='movieId', values='rating')

# fill missing values with 0
user_movie_interaction = user_movie_interaction.fillna(0)

user_movie_interaction.head(3)

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,...,200638,200814,200818,200820,200822,200824,200838,200842,200938,201200,201242,201314,201340,201368,201586,201588,201594,201646,201749,201773,201811,202101,202103,202155,202159,202237,202393,202421,202429,202439,202519,202575,202711,202934,203208,203218,203222,203244,203513,203519,204294,204352,204542,204692,204698,205072,205076,205383,205425,206499
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
13,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
72,0.0,2.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,4.0,5.0,4.0,0.0,0.0,5.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
# split the interaction data into three sets
train_user_movie_interaction = user_movie_interaction.iloc[:3000,:]
dev_user_movie_interaction = user_movie_interaction.iloc[3000:6000,:]
test_user_movie_interaction = user_movie_interaction.iloc[6000:,:]

In [42]:
# Convert dense matrix to sparse matrix for training set
train_user_movie_interaction_csr = csr_matrix(train_user_movie_interaction.values)
train_user_movie_interaction_csr

<3000x13768 sparse matrix of type '<class 'numpy.float64'>'
	with 470248 stored elements in Compressed Sparse Row format>

In [43]:
# Convert dense matrix to sparse matrix for dev set
dev_user_movie_interaction_csr = csr_matrix(dev_user_movie_interaction.values)
dev_user_movie_interaction_csr

<3000x13768 sparse matrix of type '<class 'numpy.float64'>'
	with 464937 stored elements in Compressed Sparse Row format>

In [44]:
# Convert dense matrix to sparse matrix for test set
test_user_movie_interaction_csr = csr_matrix(test_user_movie_interaction.values)
test_user_movie_interaction_csr

<3000x13768 sparse matrix of type '<class 'numpy.float64'>'
	with 450609 stored elements in Compressed Sparse Row format>

In [45]:
# create a user dictionary for lightfm usage purpose
user_id = list(test_user_movie_interaction.index)
user_dict = {}
counter = 0 
for i in user_id:
    user_dict[i] = counter
    counter += 1

# print first 5 items:
for item in list(user_dict)[0:5]:
    print (item, user_dict[item])

108659 0
108705 1
108714 2
108738 3
108743 4


## Movie Data Part Twon

In [46]:
# feature transformation and only use the movies data that interacts with our randomly select users
movies_metadata = movies_metadata.loc[movies_metadata['movieId'].isin(list(user_movie_interaction.columns))]
movies_metadata = movies_metadata.reset_index(drop=True)

In [47]:
# Convert dense matrix to sparse matrix for movie data
movies_metadata_csr = csr_matrix(movies_metadata.drop(['movieId','title'], axis=1).values)
movies_metadata_csr

<13768x12049 sparse matrix of type '<class 'numpy.uint8'>'
	with 15530304 stored elements in Compressed Sparse Row format>

In [48]:
pd.set_option("display.max_colwidth", -1)
pd.DataFrame(movies_metadata_csr).head()

Unnamed: 0,0
0,"(0, 0)\t1\n (0, 11)\t1\n (0, 23)\t1\n (0, 34)\t1\n (0, 44)\t1\n (0, 55)\t1\n (0, 65)\t1\n (0, 76)\t1\n (0, 86)\t1\n (0, 95)\t1\n (0, 111)\t1\n (0, 116)\t1\n (0, 128)\t1\n (0, 137)\t1\n (0, 148)\t1\n (0, 159)\t1\n (0, 168)\t1\n (0, 180)\t1\n (0, 197)\t1\n (0, 204)\t1\n (0, 214)\t1\n (0, 224)\t1\n (0, 233)\t1\n (0, 243)\t1\n (0, 254)\t1\n :\t:\n (0, 11780)\t1\n (0, 11788)\t1\n (0, 11798)\t1\n (0, 11809)\t1\n (0, 11826)\t1\n (0, 11832)\t1\n (0, 11842)\t1\n (0, 11854)\t1\n (0, 11863)\t1\n (0, 11875)\t1\n (0, 11892)\t1\n (0, 11896)\t1\n (0, 11907)\t1\n (0, 11918)\t1\n (0, 11929)\t1\n (0, 11939)\t1\n (0, 11950)\t1\n (0, 11961)\t1\n (0, 11972)\t1\n (0, 11984)\t1\n (0, 11995)\t1\n (0, 12005)\t1\n (0, 12016)\t1\n (0, 12028)\t1\n (0, 12038)\t1"
1,"(0, 0)\t1\n (0, 11)\t1\n (0, 23)\t1\n (0, 34)\t1\n (0, 44)\t1\n (0, 55)\t1\n (0, 65)\t1\n (0, 75)\t1\n (0, 85)\t1\n (0, 96)\t1\n (0, 106)\t1\n (0, 116)\t1\n (0, 127)\t1\n (0, 137)\t1\n (0, 148)\t1\n (0, 157)\t1\n (0, 168)\t1\n (0, 181)\t1\n (0, 196)\t1\n (0, 205)\t1\n (0, 216)\t1\n (0, 226)\t1\n (0, 234)\t1\n (0, 245)\t1\n (0, 254)\t1\n :\t:\n (0, 11780)\t1\n (0, 11788)\t1\n (0, 11798)\t1\n (0, 11809)\t1\n (0, 11823)\t1\n (0, 11832)\t1\n (0, 11842)\t1\n (0, 11853)\t1\n (0, 11865)\t1\n (0, 11878)\t1\n (0, 11887)\t1\n (0, 11897)\t1\n (0, 11907)\t1\n (0, 11917)\t1\n (0, 11929)\t1\n (0, 11940)\t1\n (0, 11950)\t1\n (0, 11961)\t1\n (0, 11972)\t1\n (0, 11984)\t1\n (0, 11994)\t1\n (0, 12005)\t1\n (0, 12016)\t1\n (0, 12028)\t1\n (0, 12038)\t1"
2,"(0, 0)\t1\n (0, 12)\t1\n (0, 22)\t1\n (0, 34)\t1\n (0, 43)\t1\n (0, 54)\t1\n (0, 64)\t1\n (0, 75)\t1\n (0, 84)\t1\n (0, 95)\t1\n (0, 105)\t1\n (0, 116)\t1\n (0, 127)\t1\n (0, 137)\t1\n (0, 148)\t1\n (0, 157)\t1\n (0, 168)\t1\n (0, 180)\t1\n (0, 192)\t1\n (0, 202)\t1\n (0, 214)\t1\n (0, 223)\t1\n (0, 233)\t1\n (0, 243)\t1\n (0, 253)\t1\n :\t:\n (0, 11778)\t1\n (0, 11788)\t1\n (0, 11798)\t1\n (0, 11809)\t1\n (0, 11821)\t1\n (0, 11832)\t1\n (0, 11842)\t1\n (0, 11853)\t1\n (0, 11863)\t1\n (0, 11874)\t1\n (0, 11887)\t1\n (0, 11895)\t1\n (0, 11910)\t1\n (0, 11917)\t1\n (0, 11929)\t1\n (0, 11940)\t1\n (0, 11950)\t1\n (0, 11961)\t1\n (0, 11972)\t1\n (0, 11984)\t1\n (0, 11994)\t1\n (0, 12005)\t1\n (0, 12016)\t1\n (0, 12028)\t1\n (0, 12038)\t1"
3,"(0, 0)\t1\n (0, 11)\t1\n (0, 22)\t1\n (0, 33)\t1\n (0, 44)\t1\n (0, 54)\t1\n (0, 64)\t1\n (0, 75)\t1\n (0, 84)\t1\n (0, 95)\t1\n (0, 105)\t1\n (0, 115)\t1\n (0, 127)\t1\n (0, 137)\t1\n (0, 148)\t1\n (0, 157)\t1\n (0, 168)\t1\n (0, 180)\t1\n (0, 191)\t1\n (0, 202)\t1\n (0, 216)\t1\n (0, 226)\t1\n (0, 233)\t1\n (0, 243)\t1\n (0, 254)\t1\n :\t:\n (0, 11778)\t1\n (0, 11788)\t1\n (0, 11798)\t1\n (0, 11809)\t1\n (0, 11821)\t1\n (0, 11832)\t1\n (0, 11844)\t1\n (0, 11853)\t1\n (0, 11863)\t1\n (0, 11874)\t1\n (0, 11886)\t1\n (0, 11895)\t1\n (0, 11916)\t1\n (0, 11917)\t1\n (0, 11932)\t1\n (0, 11940)\t1\n (0, 11950)\t1\n (0, 11961)\t1\n (0, 11972)\t1\n (0, 11985)\t1\n (0, 11995)\t1\n (0, 12005)\t1\n (0, 12016)\t1\n (0, 12028)\t1\n (0, 12038)\t1"
4,"(0, 0)\t1\n (0, 12)\t1\n (0, 22)\t1\n (0, 33)\t1\n (0, 44)\t1\n (0, 55)\t1\n (0, 64)\t1\n (0, 75)\t1\n (0, 84)\t1\n (0, 95)\t1\n (0, 105)\t1\n (0, 115)\t1\n (0, 127)\t1\n (0, 137)\t1\n (0, 148)\t1\n (0, 157)\t1\n (0, 168)\t1\n (0, 181)\t1\n (0, 192)\t1\n (0, 203)\t1\n (0, 214)\t1\n (0, 223)\t1\n (0, 233)\t1\n (0, 243)\t1\n (0, 253)\t1\n :\t:\n (0, 11779)\t1\n (0, 11788)\t1\n (0, 11798)\t1\n (0, 11809)\t1\n (0, 11822)\t1\n (0, 11832)\t1\n (0, 11843)\t1\n (0, 11853)\t1\n (0, 11863)\t1\n (0, 11874)\t1\n (0, 11886)\t1\n (0, 11895)\t1\n (0, 11910)\t1\n (0, 11917)\t1\n (0, 11929)\t1\n (0, 11940)\t1\n (0, 11950)\t1\n (0, 11961)\t1\n (0, 11972)\t1\n (0, 11985)\t1\n (0, 11994)\t1\n (0, 12005)\t1\n (0, 12016)\t1\n (0, 12028)\t1\n (0, 12038)\t1"


In [49]:
# create a movie dictionary for lightfm usage purpose
item_dict ={}
df = movies_metadata[['movieId', 'title']]

for i in range(df.shape[0]):
    item_dict[df.loc[i,'movieId']] = df.loc[i,'title']

# print first 5 items:
for item in list(item_dict)[0:5]:
    print (item, item_dict[item])

1 Toy Story (1995)
2 Jumanji (1995)
3 Grumpier Old Men (1995)
4 Waiting to Exhale (1995)
5 Father of the Bride Part II (1995)


## LightFM

In [50]:
def movie_recommendation_user(model, user_movie_interaction, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 5, show = True):
    """
    input the model, user id and relevant data,
    the method will return a recommendation movie list for that user
    """
    # model prediction for user_id
    n_users, n_items = user_movie_interaction.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items), item_features=movies_metadata_csr))
    scores.index = user_movie_interaction.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    # known items for user_id
    known_items = list(pd.Series(user_movie_interaction.loc[user_id,:] \
                                 [user_movie_interaction.loc[user_id,:] > threshold].index).sort_values(ascending=False))
    
    # recommended items for user_id
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    
    
    if show == True:
        print ("User: " + str(user_id))
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1
            
        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1

            
def rmse_user(model, user_movie_interaction, user_id, user_dict):
    """
    caculate the root mean square error of the model's prediciton for a user
    """
    n_users, n_items = user_movie_interaction.shape
    user_x = user_dict[user_id]
    predict_scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    known_scores = user_movie_interaction.loc[user_id,:]
    score = np.sqrt(mean_squared_error(known_scores, predict_scores))
    return score
            
    
def sample_hyperparameters():
    """
    Yield possible hyperparameter choices.
    """

    while True:
        yield {
            "no_components": np.random.randint(16, 64),
            "learning_schedule": np.random.choice(["adagrad", "adadelta"]),
            "loss": np.random.choice(["bpr", "warp", "warp-kos", "logistic"]),
            "learning_rate": np.random.exponential(0.05),
            "item_alpha": np.random.exponential(1e-8),
            "user_alpha": np.random.exponential(1e-8),
            "max_sampled": np.random.randint(5, 15),
            "num_epochs": np.random.randint(5, 50),
        }


def random_search(train, dev, num_samples=10, num_threads=8):
    """
    Sample random hyperparameters, fit a LightFM model, and evaluate it
    on the test set.

    Return a generator of (auc_score, hyperparameter dict, fitted model)
    """

    for hyperparams in itertools.islice(sample_hyperparameters(), num_samples):
        num_epochs = hyperparams.pop("num_epochs")

        model = LightFM(**hyperparams)
        model.fit(train, epochs=num_epochs, num_threads=num_threads, verbose=True)

        score = auc_score(model, dev, num_threads=num_threads).mean()
        pre_k = precision_at_k(model, dev, num_threads=num_threads).mean()
        hyperparams["num_epochs"] = num_epochs
        print("Score {} {} at {}".format(score, pre_k, hyperparams))

        yield (score, hyperparams, model)

## Hyperparameter Tuning through Random Search

In [607]:
# hyperparameter tuning
train = train_user_movie_interaction_csr
dev = dev_user_movie_interaction_csr
# random search to find the best hyperparameter set
(score, hyperparams, model) = max(random_search(train, dev), key=lambda x: x[0])
print("Best score {} at {}".format(score, hyperparams))

Epoch: 100%|██████████| 16/16 [00:09<00:00,  1.62it/s]
Epoch:   0%|          | 0/38 [00:00<?, ?it/s]

Score 0.8178206086158752 0.2556000053882599 at {'no_components': 37, 'learning_schedule': 'adadelta', 'loss': 'warp', 'learning_rate': 0.024772539402324045, 'item_alpha': 1.1973499541502058e-09, 'user_alpha': 1.4631006451592346e-08, 'max_sampled': 14, 'num_epochs': 16}


Epoch: 100%|██████████| 38/38 [00:36<00:00,  1.03it/s]
Epoch:   0%|          | 0/27 [00:00<?, ?it/s]

Score 0.7686077356338501 0.21606667339801788 at {'no_components': 49, 'learning_schedule': 'adadelta', 'loss': 'warp-kos', 'learning_rate': 0.06846821063084071, 'item_alpha': 3.073648747341085e-08, 'user_alpha': 3.0972066025610927e-09, 'max_sampled': 13, 'num_epochs': 38}


Epoch: 100%|██████████| 27/27 [00:19<00:00,  1.42it/s]
Epoch:   0%|          | 0/28 [00:00<?, ?it/s]

Score 0.7998589873313904 0.2269333302974701 at {'no_components': 41, 'learning_schedule': 'adadelta', 'loss': 'warp-kos', 'learning_rate': 0.004451455532153441, 'item_alpha': 4.350517866834001e-09, 'user_alpha': 1.0014323478637922e-08, 'max_sampled': 8, 'num_epochs': 27}


Epoch: 100%|██████████| 28/28 [00:20<00:00,  1.37it/s]
Epoch:   0%|          | 0/49 [00:00<?, ?it/s]

Score 0.8059843182563782 0.23029999434947968 at {'no_components': 28, 'learning_schedule': 'adadelta', 'loss': 'warp-kos', 'learning_rate': 0.017022294859726925, 'item_alpha': 3.461132691045516e-09, 'user_alpha': 1.8174815710291554e-08, 'max_sampled': 6, 'num_epochs': 28}


Epoch: 100%|██████████| 49/49 [00:27<00:00,  1.79it/s]
Epoch:   0%|          | 0/17 [00:00<?, ?it/s]

Score 0.6253770589828491 0.35136669874191284 at {'no_components': 29, 'learning_schedule': 'adagrad', 'loss': 'bpr', 'learning_rate': 0.002426664484704832, 'item_alpha': 9.373892054134351e-09, 'user_alpha': 1.579320519932206e-08, 'max_sampled': 9, 'num_epochs': 49}


Epoch: 100%|██████████| 17/17 [00:10<00:00,  1.63it/s]
Epoch:   0%|          | 0/15 [00:00<?, ?it/s]

Score 0.8040328621864319 0.22966666519641876 at {'no_components': 32, 'learning_schedule': 'adadelta', 'loss': 'warp-kos', 'learning_rate': 0.019879578639396808, 'item_alpha': 4.874671824649497e-09, 'user_alpha': 5.36655635806943e-09, 'max_sampled': 10, 'num_epochs': 17}


Epoch: 100%|██████████| 15/15 [00:05<00:00,  2.60it/s]
Epoch:   0%|          | 0/7 [00:00<?, ?it/s]

Score 0.8798730373382568 0.3408333361148834 at {'no_components': 21, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.031167453655458057, 'item_alpha': 1.962836129078276e-09, 'user_alpha': 3.962488066362012e-09, 'max_sampled': 8, 'num_epochs': 15}


Epoch: 100%|██████████| 7/7 [00:03<00:00,  1.85it/s]
Epoch:   0%|          | 0/8 [00:00<?, ?it/s]

Score 0.9329580068588257 0.3989333510398865 at {'no_components': 39, 'learning_schedule': 'adadelta', 'loss': 'logistic', 'learning_rate': 0.004048890976213637, 'item_alpha': 7.589964394247697e-10, 'user_alpha': 5.118811409673707e-09, 'max_sampled': 11, 'num_epochs': 7}


Epoch: 100%|██████████| 8/8 [00:06<00:00,  1.21it/s]
Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Score 0.8299961090087891 0.25380000472068787 at {'no_components': 26, 'learning_schedule': 'adadelta', 'loss': 'warp-kos', 'learning_rate': 0.027060389337015895, 'item_alpha': 1.598144584599774e-08, 'user_alpha': 1.6366057194082508e-11, 'max_sampled': 9, 'num_epochs': 8}


Epoch: 100%|██████████| 6/6 [00:03<00:00,  1.87it/s]


Score 0.9295874238014221 0.4227333068847656 at {'no_components': 35, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.0024262452707052764, 'item_alpha': 1.0928881159828255e-08, 'user_alpha': 2.3950863646959315e-08, 'max_sampled': 5, 'num_epochs': 6}
Best score 0.9329580068588257 at {'no_components': 39, 'learning_schedule': 'adadelta', 'loss': 'logistic', 'learning_rate': 0.004048890976213637, 'item_alpha': 7.589964394247697e-10, 'user_alpha': 5.118811409673707e-09, 'max_sampled': 11, 'num_epochs': 7}


## Testing (use the best performance hyperparameter set of logistic loss function)

In [55]:
# use the best parameter set we get from the random search to fit into our lightfm model
num_epochs = 35
dic = {'no_components': 60, 'learning_schedule': 'adagrad', 'max_sampled': 8,
       'loss': 'logistic', 'learning_rate': 0.036339577444888554, 
       'item_alpha': 3.291179269631395e-09, 'user_alpha': 6.085003729779842e-09}

model = LightFM(**dic)

model = model.fit(train_user_movie_interaction_csr,
                  epochs=num_epochs,
                  num_threads=8,
                  verbose=True)

Epoch: 100%|██████████| 35/35 [00:23<00:00,  1.50it/s]


In [56]:
# calculate the rmse for our model with test set
sum = 0
for i, key in enumerate(user_dict.keys()):
    user_id = key
    user_rmse = rmse_user(model, test_user_movie_interaction, user_id, user_dict)
    sum += user_rmse
print("The RMSE of the model on the test data is {}".format(sum/len(user_dict)))

The RMSE of the model on the test data is 2.79440707215629


In [57]:
# caculate the auc score of our model with test set
score = auc_score(model, test_user_movie_interaction_csr, num_threads=8).mean()
print("AUC Score is {} at {}".format(score, dic))

AUC Score is 0.9351380467414856 at {'no_components': 60, 'learning_schedule': 'adagrad', 'max_sampled': 8, 'loss': 'logistic', 'learning_rate': 0.036339577444888554, 'item_alpha': 3.291179269631395e-09, 'user_alpha': 6.085003729779842e-09}


In [664]:
# select a user id to do the recommendation
user_id = 0
for i, key in enumerate(user_dict.keys()):
    if i==124:
        user_id=key

In [611]:
# provide recommendation for that user
movie_recommendation_user(model, test_user_movie_interaction, user_id, user_dict, item_dict)

User: 110368
Known Likes:
1- Die Hard (1988)
2- Lone Star (1996)
3- Eraser (1996)
4- Independence Day (a.k.a. ID4) (1996)
5- Twister (1996)
6- Rock, The (1996)
7- Wallace & Gromit: The Best of Aardman Animation (1996)
8- Truth About Cats & Dogs, The (1996)
9- Mission: Impossible (1996)
10- Courage Under Fire (1996)
11- Fargo (1996)
12- Pinocchio (1940)
13- Beauty and the Beast (1991)
14- Snow White and the Seven Dwarfs (1937)
15- Silence of the Lambs, The (1991)
16- Dances with Wolves (1990)
17- Terminator 2: Judgment Day (1991)
18- Aladdin (1992)
19- Sleepless in Seattle (1993)
20- Shadowlands (1993)
21- Searching for Bobby Fischer (1993)
22- Schindler's List (1993)
23- Remains of the Day, The (1993)
24- Piano, The (1993)
25- Philadelphia (1993)
26- Mrs. Doubtfire (1993)
27- Much Ado About Nothing (1993)
28- Executive Decision (1996)
29- Man Without a Face, The (1993)
30- Jurassic Park (1993)
31- In the Name of the Father (1993)
32- In the Line of Fire (1993)
33- Fugitive, The (1993)


## Testing (use the best performance hyperparameter set of warp loss function )

In [58]:
# use the best parameter set we get from the random search to fit into our lightfm model
num_epochs = 15
dic = {'no_components': 21, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.09,
       'item_alpha': 1.962836129078276e-09, 'user_alpha': 3.962488066362012e-09, 'max_sampled': 8}

model = LightFM(**dic)

model = model.fit(train_user_movie_interaction_csr,
                  epochs=num_epochs,
                  num_threads=8,
                  verbose=True)

Epoch: 100%|██████████| 15/15 [00:06<00:00,  2.15it/s]


In [59]:
# select a user id to do the recommendation
user_id = 0
for i, key in enumerate(user_dict.keys()):
    if i==124:
        user_id=key

In [60]:
# provide recommendation for that user
movie_recommendation_user(model, test_user_movie_interaction, user_id, user_dict, item_dict)

User: 110911
Known Likes:
1- X2: X-Men United (2003)
2- Lord of the Rings: The Fellowship of the Ring, The (2001)
3- Shrek (2001)
4- Almost Famous (2000)
5- Gone in 60 Seconds (2000)
6- Cider House Rules, The (1999)
7- For Your Eyes Only (1981)
8- Bowfinger (1999)
9- Eyes Wide Shut (1999)
10- Notting Hill (1999)
11- Mummy, The (1999)
12- Enemy of the State (1998)
13- Few Good Men, A (1992)
14- Lethal Weapon 2 (1989)
15- As Good as It Gets (1997)
16- Good, the Bad and the Ugly, The (Buono, il brutto, il cattivo, Il) (1966)
17- English Patient, The (1996)
18- Nutty Professor, The (1996)
19- Independence Day (a.k.a. ID4) (1996)
20- Striptease (1996)
21- Dances with Wolves (1990)
22- Sleepless in Seattle (1993)
23- Casper (1995)
24- Bridges of Madison County, The (1995)
25- Pocahontas (1995)
26- Copycat (1995)

 Recommended Items:
1- American Hardcore (2006)
2- Dog Eat Dog (2016)
3- Shoot the Moon (1982)
4- The Dawn Wall (2018)
5- The Wrecking Crew (2008)


In [61]:
# caculate the auc score of our model with test set
score = auc_score(model, test_user_movie_interaction_csr, num_threads=8).mean()
print("AUC Score is {} at {}".format(score, dic))

AUC Score is 0.867211103439331 at {'no_components': 21, 'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.09, 'item_alpha': 1.962836129078276e-09, 'user_alpha': 3.962488066362012e-09, 'max_sampled': 8}


## Make Recommedation for User 9171

In [86]:
# generate a test set which inclues user 9171 interaction data
interactions_inclue_9171 = interactions[interactions['userId'].isin(range(7000,10000))]
user_movie_interaction_inclue_9171 = pd.pivot_table(interactions_inclue_9171, index='userId', columns='movieId', values='rating')
user_movie_interaction_inclue_9171 = user_movie_interaction_inclue_9171.fillna(0)

In [87]:
# create a user dictionary for lightfm usage purpose
user_id = list(user_movie_interaction_inclue_9171.index)
user_dict = {}
counter = 0 
for i in user_id:
    user_dict[i] = counter
    counter += 1

In [88]:
# make movie recommendation for user 9171
movie_recommendation_user(model, user_movie_interaction_inclue_9171, 9171, user_dict, item_dict, nrec_items=10)

User: 9171
Known Likes:
1- Bird Box (2018)
2- Bohemian Rhapsody (2018)
3- A Star Is Born (2018)
4- BlacKkKlansman (2018)
5- Deadpool 2 (2018)
6- Incredibles 2 (2018)
7- Wild Wild Country (2018)
8- Hereditary (2018)
9- Annihilation (2018)
10- Coco (2017)
11- Three Billboards Outside Ebbing, Missouri (2017)
12- Blade Runner 2049 (2017)
13- It (2017)
14- Dunkirk (2017)
15- Planet Earth II (2016)
16- Get Out (2017)
17- Passengers (2016)
18- Whiplash (2013)
19- La La Land (2016)
20- Arrival (2016)
21- Moonlight
22- Planet Earth (2006)
23- The Handmaiden (2016)
24- The Nice Guys (2016)
25- 10 Cloverfield Lane (2016)
26- Big Short, The (2015)
27- Spotlight (2015)
28- Love (2015)
29- Sicario (2015)
30- Ghost in the Shell 2.0 (2008)
31- Inside Out (2015)
32- The Jinx: The Life and Deaths of Robert Durst (2015)
33- Mad Max: Fury Road (2015)
34- It Follows (2014)
35- Chappie (2015)
36- The Imitation Game (2014)
37- Ex Machina (2015)
38- Nightcrawler (2014)
39- Fury (2014)
40- Guardians of the Gal