In [1]:
from surprise import NMF
from surprise import Dataset
from surprise import accuracy

In [2]:
# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin('ml-100k')

## NMF

In [3]:
# Retrieve the trainset.
trainset1 = data.build_full_trainset()
testset1 = trainset1.build_anti_testset()
nmf = NMF()
nmf.fit(trainset1)
predictions1 = nmf.test(testset1)
accuracy.rmse(predictions1)

RMSE: 0.9807


0.9807313211703795

In [4]:
from collections import defaultdict

In [5]:
def precision_recall_at_k(predictions, k=10, threshold=3.5):
    '''Return precision and recall at k metrics for each user.'''

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    return precisions, recalls

In [6]:
from surprise.model_selection import KFold
kf = KFold(n_splits=5)

In [7]:
for trainset, testset in kf.split(data):
    nmf.fit(trainset)
    predictions_original = nmf.test(testset)
    precisions_original, recalls_original = precision_recall_at_k(predictions_original, k=5, threshold=4)

In [8]:
# Precision and recall can then be averaged over all users
print("Precision : ", sum(prec for prec in precisions_original.values()) / len(precisions_original))
print("Recall : ", sum(rec for rec in recalls_original.values()) / len(recalls_original))

Precision :  0.8436946510804106
Recall :  0.26071784156952593


## SVD

In [9]:
from surprise import SVD
# Retrieve the trainset.
trainset2 = data.build_full_trainset()
testset2 = trainset1.build_anti_testset()
svd = SVD()
svd.fit(trainset)
predictions2 = svd.test(testset)
accuracy.rmse(predictions2)

RMSE: 0.9373


0.9372695611691683

In [10]:
for trainset, testset in kf.split(data):
    svd.fit(trainset)
    predictions_original = svd.test(testset)
    precisions_original, recalls_original = precision_recall_at_k(predictions_original, k=5, threshold=4)

In [11]:
# Precision and recall can then be averaged over all users
print("Precision : ", sum(prec for prec in precisions_original.values()) / len(precisions_original))
print("Recall : ", sum(rec for rec in recalls_original.values()) / len(recalls_original))

Precision :  0.8735595616825728
Recall :  0.26859364376782935


## ALS

In [12]:
trainset3 = data.build_full_trainset()
testset3 = trainset1.build_anti_testset()
from surprise import BaselineOnly
bsl_options = {'method': 'als','n_epochs': 1000}
als = BaselineOnly(bsl_options=bsl_options)
als.fit(trainset3)
predictions3 = als.test(testset3)
accuracy.rmse(predictions3)

Estimating biases using als...
RMSE: 0.5169


0.5169349211295566

In [13]:
for trainset, testset in kf.split(data):
    als.fit(trainset)
    predictions_original = als.test(testset)
    precisions_original, recalls_original = precision_recall_at_k(predictions_original, k=5, threshold=4)

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...


In [14]:
# Precision and recall can then be averaged over all users
print("Precision : ", sum(prec for prec in precisions_original.values()) / len(precisions_original))
print("Recall : ", sum(rec for rec in recalls_original.values()) / len(recalls_original))

Precision :  0.8861641896673742
Recall :  0.22524227660379637
