The penultimate algorithm I will be testing is Slope One. Described as a simple
but effective collaborative model. It is neither a nearest neirbough or a matirx factorisation algorithm and has no tunable parameters.

In [5]:
from surprise import Dataset, SlopeOne
from surprise.accuracy import rmse, mae
from surprise.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import time


In [2]:
#split into validate and test sets

data100k = Dataset.load_builtin('ml-100k')
raw_ratings= data100k.raw_ratings

random.seed(2001)
np.random.seed(2001)
random.shuffle(raw_ratings)

# create threshold for unseen, 80-20
cutoff = int(0.8 * len(raw_ratings))
A_raw= raw_ratings[:cutoff]
B_raw= raw_ratings[cutoff:]

# data is now only set A ratings
data100k.raw_ratings= A_raw

In [10]:
# training and testing on ml100k
algo= SlopeOne()

trainset = data100k.build_full_trainset()
start_fit = time.time()
algo.fit(trainset)
fit_100k= time.time()-start_fit

testset = data100k.construct_testset(B_raw)  # testset is now the set B
start_predict= time.time()
predictions_100k = algo.test(testset)
test_100k= time.time()-start_predict
print("Unbiased accuracy on 100k=,", end=" ")
rmse_100k= rmse(predictions_100k)
print("Fit time for 100k = "+ str(fit_100k))
print("Test time for 100k ="+ str(test_100k))

Unbiased accuracy on 100k=, RMSE: 0.9438
Fit time for 100k = 1.3098831176757812
Test time for 100k =3.9256973266601562


In [12]:
# training and testing on ml1m
data1m= Dataset.load_builtin('ml-1m')
trainset, testset = train_test_split(data1m, test_size=0.25)
start_fit = time.time()
algo.fit(trainset)
fit_1m= time.time()-start_fit

start_predict= time.time()
predictions_1m = algo.test(testset)
test_1m= time.time()-start_predict
print("Unbiased accuracy on 1m=,", end=" ")
rmse_1m= rmse(predictions_1m)
print("Fit time for 1m = "+ str(fit_1m))
print("Test time for 1m ="+ str(test_1m))


Unbiased accuracy on 1m=, RMSE: 0.9066
Fit time for 1m = 14.612466096878052
Test time for 1m =83.45496582984924
