In [26]:
import boto3
import pandas as pd
import numpy as np
import sys
from io import StringIO
import surprise
from surprise import accuracy
from sklearn.model_selection import train_test_split
from surprise import SVD, NMF, KNNBaseline
import pickle

In [2]:
sys.path.insert(1, '../src')
from Collaborative_recommender import CollaborativeRecommender

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
%reload_ext autoreload

In [5]:
s3 = boto3.client('s3')

In [6]:
df_all_15 = pd.read_csv("s3://recommender-system-amazon/df_all_15_text.csv",index_col=[0])

In [7]:
df_all_15.head(5)

Unnamed: 0,fit,title,also_buy,image_x,price,asin,overall,reviewerID,reviewText,item_text,title_nlp
0,,Black Mountain Products Resistance Band Set wi...,"['1612431712', 'B01AVDVHTI', 'B002YQUP7Q', 'B0...",['https://images-na.ssl-images-amazon.com/imag...,$17.32,7245456313,5.0,A1J5VYG8O0YEMB,work advertis work advertis,black mountain,black mountain product resist band set door an...
1,,Black Mountain Products Resistance Band Set wi...,"['1612431712', 'B01AVDVHTI', 'B002YQUP7Q', 'B0...",['https://images-na.ssl-images-amazon.com/imag...,$17.32,7245456313,4.0,AYMDRRCOPKTCF,four star product arriv prompt describ,black mountain,black mountain product resist band set door an...
2,,Black Mountain Products Resistance Band Set wi...,"['1612431712', 'B01AVDVHTI', 'B002YQUP7Q', 'B0...",['https://images-na.ssl-images-amazon.com/imag...,$17.32,7245456313,4.0,A3AX86YSUHLELL,great resist band top qualiti okay resist band...,black mountain,black mountain product resist band set door an...
3,,Black Mountain Products Resistance Band Set wi...,"['1612431712', 'B01AVDVHTI', 'B002YQUP7Q', 'B0...",['https://images-na.ssl-images-amazon.com/imag...,$17.32,7245456313,5.0,A12MJJT1RL2S7K,perfect transport resist band perfect cross fi...,black mountain,black mountain product resist band set door an...
4,,Black Mountain Products Resistance Band Set wi...,"['1612431712', 'B01AVDVHTI', 'B002YQUP7Q', 'B0...",['https://images-na.ssl-images-amazon.com/imag...,$17.32,7245456313,5.0,A33BKACRGR3H38,awesom great band crosstrain dont like lift gy...,black mountain,black mountain product resist band set door an...


In [8]:
df_all_15.shape

(227509, 11)

In [9]:
df_all_15.columns

Index(['fit', 'title', 'also_buy', 'image_x', 'price', 'asin', 'overall',
       'reviewerID', 'reviewText', 'item_text', 'title_nlp'],
      dtype='object')

In [10]:
df_collab = df_all_15[['reviewerID',"asin","overall"]]

In [11]:
df_collab.head()

Unnamed: 0,reviewerID,asin,overall
0,A1J5VYG8O0YEMB,7245456313,5.0
1,AYMDRRCOPKTCF,7245456313,4.0
2,A3AX86YSUHLELL,7245456313,4.0
3,A12MJJT1RL2S7K,7245456313,5.0
4,A33BKACRGR3H38,7245456313,5.0


In [12]:
df_collab.shape

(227509, 3)

## Using surprise to develop a collaborative filter (NMF based method)

In [13]:
reader = surprise.Reader(rating_scale=(1,5))
recom = CollaborativeRecommender(df_collab,reader,NMF)

Use grid search to find the best paramaters for the model

In [15]:
param_grid = {'n_factors': [8,15,23,30,37,42], 'n_epochs': [10, 50, 70]}
recom.grid(param_grid,cv=3)
recom.fit()


Accuracy for the best model and predictions

In [17]:
df_pred, acc = recom.pred()

print(f'accuracy of the best model: {acc}')

RMSE: 0.3959
accuracy of the best model: 0.3959284557005485


Prediction matrix

In [18]:
df_pred['Iu'] = df_pred.uid.apply(recom.get_Iu)
df_pred['Ui'] = df_pred.iid.apply(recom.get_Ui)
df_pred

Unnamed: 0,uid,iid,r_ui,est,details,err,Iu,Ui
0,A1J5VYG8O0YEMB,7245456313,5.0,5.000000,{'was_impossible': False},0.000000,5,176
1,A1J5VYG8O0YEMB,B001RWUZ0U,4.0,4.558899,{'was_impossible': False},0.558899,5,60
2,A1J5VYG8O0YEMB,B004RDP2I6,4.0,4.454279,{'was_impossible': False},0.454279,5,19
3,A1J5VYG8O0YEMB,B00MAJKX9M,5.0,5.000000,{'was_impossible': False},0.000000,5,62
4,A1J5VYG8O0YEMB,B00TOKD51S,4.0,4.540833,{'was_impossible': False},0.540833,5,32
...,...,...,...,...,...,...,...,...
227504,A2N7AV15HVN942,B01G81DGH8,4.0,4.482234,{'was_impossible': False},0.482234,1,8
227505,A2WTSMH8S7BEFZ,B01GA3N9MQ,4.0,4.478469,{'was_impossible': False},0.478469,1,62
227506,A2YV0C9IG1KBMQ,B01GPXBOVY,5.0,5.000000,{'was_impossible': False},0.000000,1,32
227507,A2J8289OOT5DE4,B01H8CZ91M,4.0,4.483062,{'was_impossible': False},0.483062,1,66


Utility Matrix

In [19]:
UM = recom.utility_matrix()
UM

iid,7245456275,7245456313,B00003G4JR,B00004NKIQ,B00004SYN6,B00004T11T,B00004TBLW,B00004YTPE,B000051ZHS,B0000568SY,...,B01HAGT6G0,B01HBACO4G,B01HBPL78K,B01HCI0I6S,B01HCKR9ZE,B01HCU3NTU,B01HD17S3A,B01HE0PMFQ,B01HEYYKLO,B01HHB2HK0
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0029274J35Q1MYNKUWO,,,,,,,,,,,...,,,,,,,,,,
A0098869QS7I6WOAODWI,,,,,,,,,,,...,,,,,,,,,,
A0289048PRWFY7ZXQKCD,,,,,,,,,,,...,,,,,,,,,,
A0418008MEWDDIUPHXP7,,,,,,,,,,,...,,,5.0,,,,,,,
A0464351OZXPUPKGI6HO,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AZZ2W9JD10R3H,,,,,,,,,,,...,,,,,,,,,,
AZZCSO2MC725N,,,,,,,,,,,...,,,,,,,,,,
AZZG39P6RDCHL,,,,,,,,,,,...,,,,,,,,,,
AZZKYP9254H32,,,,,,,,,,,...,,,,,,,,,,


Get top n items for a given user

In [20]:
UI = "AZZG39P6RDCHL"
recom.get_top_n(UI)

Unnamed: 0_level_0,predicted_rating
iid,Unnamed: 1_level_1
B000P3WQ86,5.0
B001ASUH4A,5.0
B004TBMUMM,5.0
B00AT6HZGM,5.0
B00GU3K5AQ,5.0
B00I77PNUA,5.0
B00QBLAQCE,5.0
B01D8WEH14,2.435884
7245456275,
7245456313,


## Pickle the collaborative recommender

In [27]:
with open('../../data/recom_colab.pkl', 'wb') as f:
    pickle.dump(recom, f)

In [28]:
ls ../../data


recom_colab.pkl


In [29]:
remote_file_name = 'recom_colab.pkl'
local_file_name = '../../data/recom_colab.pkl'
bucket_name = 'recommender-system-amazon'

s3.upload_file(Filename=local_file_name, 
               Bucket=bucket_name, 
               Key=remote_file_name)