In [78]:
import pandas as pd

In [79]:
movies = pd.read_csv('ml-latest-small/movies.csv')
ratings = pd.read_csv('ml-latest-small/ratings.csv')

In [80]:
movies_with_ratings = movies.merge(ratings, on='movieId').reset_index(drop=True)
movies_with_ratings.tail()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
100831,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,184,4.0,1537109082
100832,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,184,3.5,1537109545
100833,193585,Flint (2017),Drama,184,3.5,1537109805
100834,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,184,3.5,1537110021
100835,193609,Andrew Dice Clay: Dice Rules (1991),Comedy,331,4.0,1537157606


In [81]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [82]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.title,
    'rating': movies_with_ratings.rating
})
dataset.tail()

Unnamed: 0,uid,iid,rating
100831,184,Black Butler: Book of the Atlantic (2017),4.0
100832,184,No Game No Life: Zero (2017),3.5
100833,184,Flint (2017),3.5
100834,184,Bungo Stray Dogs: Dead Apple (2018),3.5
100835,331,Andrew Dice Clay: Dice Rules (1991),4.0


In [83]:
from surprise import SVDpp, SlopeOne, NormalPredictor
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

In [84]:
dataset['rating'].describe()

count   100836.0000
mean         3.5016
std          1.0425
min          0.5000
25%          3.0000
50%          3.5000
75%          4.0000
max          5.0000
Name: rating, dtype: float64

In [85]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(dataset, reader)
data

<surprise.dataset.DatasetAutoFolds at 0x1d5042ddd30>

In [86]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [87]:
algo = SVDpp()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x1d33b6a4b00>

In [88]:
test_pred = algo.test(testset)
accuracy.rmse(test_pred, verbose=True)

RMSE: 0.8606


0.8606102997670892

In [89]:
algo2 = SlopeOne()
algo2.fit(trainset)

<surprise.prediction_algorithms.slope_one.SlopeOne at 0x1d5042df9e0>

In [90]:
test_pred2 = algo2.test(testset)
accuracy.rmse(test_pred2, verbose=True)

RMSE: 0.8946


0.8945783911352443

In [91]:
algo3 = NormalPredictor()
algo3.fit(trainset)

<surprise.prediction_algorithms.random_pred.NormalPredictor at 0x1d339f10b30>

In [92]:
test_pred3 = algo3.test(testset)
accuracy.rmse(test_pred3, verbose=True)

RMSE: 1.4249


1.4249199025656958