In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF

In [2]:
df = pd.read_csv('../../Data Files/Raw Data/users_ratings.csv')
df_filtered = df[['UserID', 'Title', 'User_Rating']]
df_filtered

Unnamed: 0,UserID,Title,User_Rating
0,ur117926588,A Nightmare on Elm Street 2: Freddy's Revenge,5
1,ur117926588,Elvis,8
2,ur117926588,Where Eagles Dare,9
3,ur117926588,A Nightmare on Elm Street,9
4,ur117926588,Eagle Eye,5
...,...,...,...
15590,ur4532636,Jackie Brown,10
15591,ur4532636,Beauty and the Beast,6
15592,ur4532636,Dirty Pretty Things,9
15593,ur4532636,"Easy Riders, Raging Bulls: How the Sex, Drugs ...",7


In [6]:
df_rating = df_filtered.copy()
df_rating['combined'] = df_rating['UserID'] + df_rating['Title']
counts = df_rating['combined'].value_counts()
unique_counts = counts[counts == 1]
df_rating = df_rating[df_rating['combined'].isin(unique_counts.index)]

In [45]:
pivoted_table = df_rating.pivot(index='UserID',columns='Title',values='User_Rating').fillna(0)

In [48]:
pivoted_table

Title,#Alive,'71,'G' Men,'Pimpernel' Smith,*batteries not included,...All the Marbles,10,10 Cloverfield Lane,10 Rillington Place,10 Things I Hate About You,...,Zootopia,Zorba the Greek,Zumbo's Just Desserts,[REC] 3: Genesis,[REC] 4: Apocalypse,[Rec]²,eXistenZ,iBoy,"tick, tick... BOOM!",xXx
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ur117926588,9.0,9.0,0.0,0.0,7.0,5.0,7.0,9.0,8.0,0.0,...,9.0,0.0,7.0,0.0,0.0,0.0,8.0,6.0,0.0,4.0
ur15298231,0.0,6.0,7.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0
ur17646017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ur1994077,0.0,0.0,0.0,0.0,0.0,0.0,6.0,5.0,0.0,0.0,...,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ur22171966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,6.0,...,10.0,0.0,0.0,3.0,5.0,8.0,5.0,0.0,0.0,0.0
ur4532636,0.0,8.0,6.0,0.0,0.0,0.0,4.0,6.0,0.0,0.0,...,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [46]:
model = NMF(init='nndsvd', n_components=len(pivoted_table), solver='cd', l1_ratio=0.0, max_iter=500, random_state=0)
users_matrix = model.fit_transform(pivoted_table)
ratings_matrix = model.components_
predictions = np.dot(users_matrix, ratings_matrix)

In [52]:
threshold = 0.001
recommendations = abs(pivoted_table - predictions)
filtered = (recommendations[recommendations>threshold]).dropna(how='all')
filtered

Title,#Alive,'71,'G' Men,'Pimpernel' Smith,*batteries not included,...All the Marbles,10,10 Cloverfield Lane,10 Rillington Place,10 Things I Hate About You,...,Zootopia,Zorba the Greek,Zumbo's Just Desserts,[REC] 3: Genesis,[REC] 4: Apocalypse,[Rec]²,eXistenZ,iBoy,"tick, tick... BOOM!",xXx
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ur117926588,,,,,,,,,,0.001857,...,,,,,0.001547,0.002475,,,,
ur4532636,,,,,,,,,,,...,,,,,,,,,,


In [86]:
all_recos = {}
to_reco = 5

for index, row in filtered.iterrows():
    recos = row[row>0].sort_values(ascending=False)[:to_reco].index.tolist()
    print(row[row>0].sort_values(ascending=False)[:to_reco])
    all_recos[index] = recos

all_recos

Title
The Fault in Our Stars    0.003094
Good Will Hunting         0.003094
Training Day              0.003094
Apocalypse Now            0.003094
Les Misérables            0.003094
Name: ur117926588, dtype: float64
Title
The Fault in Our Stars    0.001113
Léon: The Professional    0.001113
They Live                 0.001113
Inside Out                0.001113
Blade Runner              0.001113
Name: ur4532636, dtype: float64


{'ur117926588': ['The Fault in Our Stars',
  'Good Will Hunting',
  'Training Day',
  'Apocalypse Now',
  'Les Misérables'],
 'ur4532636': ['The Fault in Our Stars',
  'Léon: The Professional',
  'They Live',
  'Inside Out',
  'Blade Runner']}