# Importing Dataset

In [1]:
!pip install scikit-surprise



In [2]:
import datetime
import random
import time

import numpy as np
from surprise import accuracy, Dataset, SVD, KNNBasic, KNNWithMeans
from surprise.model_selection import train_test_split

from surprise.model_selection import cross_validate, KFold
from tabulate import tabulate

In [15]:
data = Dataset.load_builtin("ml-100k")
trainset = data.build_full_trainset()

predictions = {}

algos = (
    SVD(random_state=42),
    KNNBasic(),
    KNNWithMeans()
)

for algo in algos:
    algorithm = algo.__class__.__name__
    algo.fit(trainset)

    testset = trainset.build_anti_testset()

    predictions[algorithm] = algo.test(testset)

    print(f'Algorithm: {algorithm}')
    accuracy.rmse(predictions[algorithm])
    accuracy.mae(predictions[algorithm])
    print()


Algorithm: SVD
RMSE: 0.6077
MAE:  0.4778

Computing the msd similarity matrix...
Done computing similarity matrix.
Algorithm: KNNBasic
RMSE: 0.9181
MAE:  0.6910

Computing the msd similarity matrix...
Done computing similarity matrix.
Algorithm: KNNWithMeans
RMSE: 0.7799
MAE:  0.6101



In [16]:
from collections import defaultdict

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [18]:
for algo in algos:
    algorithm = algo.__class__.__name__
    print(f'Computing top 10 recommendations from {algorithm}')

    top_n = get_top_n(predictions[algorithm], n=10)

    # Print the recommended items for each user
    #for uid, user_ratings in top_n.items():
        #print(uid, [iid for (iid, _) in user_ratings])



Computing top 10 recommendations from SVD
Computing top 10 recommendations from KNNBasic
Computing top 10 recommendations from KNNWithMeans


In [19]:
from surprise.model_selection import KFold

def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(
            ((true_r >= threshold) and (est >= threshold))
            for (est, true_r) in user_ratings[:k]
        )

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

for algo in algos:
    algorithm = algo.__class__.__name__
    print(f'Algorithm: {algorithm} at K = 5')

    precisions, recalls = precision_recall_at_k(predictions[algorithm], k=5, threshold=3.5)

    # Precision and recall can then be averaged over all users
    print(sum(prec for prec in precisions.values()) / len(precisions))
    print(sum(rec for rec in recalls.values()) / len(recalls))
    print()

Algorithm: SVD at K = 5
0.9946977730646872
0.0031640276571887078

Algorithm: KNNBasic at K = 5
1.0
0.003187843341595252

Algorithm: KNNWithMeans at K = 5
0.9989395546129375
0.0031775303530262224

