In [1]:
from surprise import SVD
from surprise import accuracy
from surprise import Reader
from surprise import Dataset
from surprise import BaselineOnly
import pickle
import pandas as pd
import numpy as np
from utils import *

In [4]:
data = pd.read_csv('data/amazon_video.csv').drop('Unnamed: 0', axis=1)

usr_idx_dict = {}
for idx, usr in enumerate(data.user.unique()):
    usr_idx_dict[usr] = idx

data['user'] = data.user.map(usr_idx_dict)

item_idx_dict = {}
for idx, track in enumerate(data.item.unique()):
    item_idx_dict[track] = idx

data['item'] = data.item.map(item_idx_dict)

data_truth = data.drop('senti_rating_finetune', axis=1)
data_bert = data.drop('rating', axis = 1)
data_bert = data_bert.rename(columns={'senti_rating_finetune':'rating'})

In [5]:
reader = Reader(rating_scale = (1.0, 5.0))
train_data = Dataset.load_from_df(data_truth, reader)
train_sr = train_data.build_full_trainset()
train_sr_eval = train_sr.build_testset()

In [6]:
bsl_options = {'method': 'als', 'n_epochs':200}
bias_baseline = BaselineOnly(bsl_options)
bias_baseline.fit(train_sr)

Estimating biases using als...


<surprise.prediction_algorithms.baseline_only.BaselineOnly at 0x146d1b67a400>

In [8]:
predictions = bias_baseline.test(train_sr_eval)


In [9]:
accuracy.rmse(predictions)

RMSE: 1.0077


1.0076546278669694

In [10]:
algo = SVD(n_factors= 50, n_epochs = 200, lr_all = 0.005, reg_all = 0.02)
algo.fit(train_sr)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x146d1b67a940>

In [54]:
predictions = algo.test(train_sr_eval)

In [55]:
accuracy.rmse(predictions)

RMSE: 0.0882


0.08818761004254107

In [56]:
predDf = pd.DataFrame(predictions)
predDf.head()

Unnamed: 0,uid,iid,r_ui,est,details
0,0,0,5.0,4.978862,{'was_impossible': False}
1,1,1,5.0,4.933323,{'was_impossible': False}
2,2,2,4.0,4.019636,{'was_impossible': False}
3,2,20,4.0,4.025328,{'was_impossible': False}
4,3,3,5.0,4.90686,{'was_impossible': False}


In [44]:
top_n = get_top_n(predictions, n = 500)

In [48]:
precisions, recalls = precision_recall_at_k(predictions, k=500, threshold=4)

In [49]:
print(sum(prec for prec in precisions.values()) / len(precisions))
print(sum(rec for rec in recalls.values()) / len(recalls))

0.7968540934042443
0.7860422651666593
