In [1]:
import pandas as pd
from surprise import Reader, Dataset
from surprise import SVD, evaluate
from surprise.model_selection import train_test_split
from surprise.dump import dump
from surprise.dump import load

In [2]:
ratings = pd.read_csv('ratings.csv')
lojas = pd.read_csv('lojas.csv')

In [3]:
ratings['like'] = ratings['like'].astype('float')

In [4]:
ratings = ratings.groupby(['user_id', 'loja_id'])['like'].mean().reset_index()

In [5]:
ratings_dict = {'itemID': list(ratings.loja_id),
                'userID': list(ratings.user_id),
                'rating': list(ratings.like)}
df = pd.DataFrame(ratings_dict)

# A reader is still needed but only the rating_scale param is required.
# The Reader class is used to parse a file containing ratings.
reader = Reader(rating_scale=(0.01, 1.0))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

In [6]:
model = SVD()
evaluate(model, data, measures=['RMSE'])



Evaluating RMSE of algorithm SVD.

------------
Fold 1
RMSE: 0.1592
------------
Fold 2
RMSE: 0.1584
------------
Fold 3
RMSE: 0.1589
------------
Fold 4
RMSE: 0.1557
------------
Fold 5
RMSE: 0.1555
------------
------------
Mean RMSE: 0.1576
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'rmse': [0.15922607225585372,
                             0.1584415093852951,
                             0.1589267106331199,
                             0.15574291416266917,
                             0.1555347790574407]})

In [7]:
trainset, testset = train_test_split(data, test_size=.25)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7ff21c286b70>

In [8]:
predictions = model.test(testset)

In [9]:
predictions

[Prediction(uid=16, iid=188, r_ui=0.3333333333333333, est=0.21375329462628553, details={'was_impossible': False}),
 Prediction(uid=10, iid=129, r_ui=0.2222222222222222, est=0.4392177427576084, details={'was_impossible': False}),
 Prediction(uid=16, iid=374, r_ui=0.3333333333333333, est=0.33211649462738607, details={'was_impossible': False}),
 Prediction(uid=20, iid=375, r_ui=0.5, est=0.410503241223928, details={'was_impossible': False}),
 Prediction(uid=6, iid=90, r_ui=0.18181818181818182, est=0.3471776762933619, details={'was_impossible': False}),
 Prediction(uid=8, iid=120, r_ui=0.45454545454545453, est=0.3688380010880731, details={'was_impossible': False}),
 Prediction(uid=20, iid=26, r_ui=0.3076923076923077, est=0.3023220359728474, details={'was_impossible': False}),
 Prediction(uid=2, iid=158, r_ui=0.36363636363636365, est=0.25663880658250343, details={'was_impossible': False}),
 Prediction(uid=8, iid=316, r_ui=0.16666666666666666, est=0.27686497691581474, details={'was_impossible

In [10]:
dump('recomendacao_lojas',algo=model)