In [1]:
import pickle
import numpy as np
import pandas as pd
from collections import defaultdict
from surprise import accuracy, Dataset, SVD, Reader
from surprise.model_selection import cross_validate, train_test_split

In [2]:
# Source: Modified from https://github.com/NicolasHug/Surprise/blob/master/examples/top_n_recommendations.py
def get_top_n_for_user(predictions, user_id, n=10):
    """
    Return the top-N recommendations for a given user from a set of predictions.

    Args:
        predictions (list of Prediction objects): The list of predictions, as returned by the test method of an algorithm.
        user_id (str or int): The user ID for whom to get recommendations.
        n (int): The number of recommendations to return. Default is 10.

    Returns:
        list or str: A sorted list of tuples [(item_id, estimated_rating), ...] of size n for the given user_id,
                     or a message if the user_id is not found.
    """

    # Filter predictions for the given user_id
    user_predictions = [(iid, est) for uid, iid, true_r, est, _ in predictions if uid == user_id]

    # If user_id is not found, return a message
    if not user_predictions:
        return f"User ID {user_id} not found in predictions."

    # Sort by estimated rating in descending order
    user_predictions.sort(key=lambda x: x[1], reverse=True)

    # Return top-N recommendations
    return user_predictions[:n]

In [3]:
# First train an SVD algorithm on the movielens dataset.
data = Dataset.load_builtin("ml-100k")
trainset = data.build_full_trainset()
algo = SVD()
algo.fit(trainset)
pd.DataFrame(cross_validate(algo, data, measures=["RMSE", "MAE"], cv=3, verbose=True))

Dataset ml-100k could not be found. Do you want to download it? [Y/n] 

 Y


Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /Users/jchang/.surprise_data/ml-100k
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9466  0.9457  0.9434  0.9453  0.0014  
MAE (testset)     0.7467  0.7467  0.7444  0.7459  0.0011  
Fit time          0.20    0.20    0.20    0.20    0.00    
Test time         0.08    0.08    0.08    0.08    0.00    


Unnamed: 0,test_rmse,test_mae,fit_time,test_time
0,0.94661,0.746708,0.197019,0.07939
1,0.945741,0.746728,0.202833,0.080176
2,0.943403,0.744368,0.202025,0.08045


In [10]:
print(trainset)

<surprise.trainset.Trainset object at 0x124e83b00>


In [4]:
# Than predict ratings for all pairs (u, i) that are NOT in the training set.
testset = trainset.build_anti_testset()
predictions = algo.test(testset)

In [11]:
uid = str(563)
get_top_n_for_user(predictions, uid, n=10)

[('169', 5),
 ('357', 4.963285101249353),
 ('513', 4.910438308509189),
 ('408', 4.876967607594528),
 ('483', 4.864595039824109),
 ('64', 4.860335106494823),
 ('98', 4.84590936653487),
 ('603', 4.824792657940018),
 ('480', 4.813991803165743),
 ('114', 4.800497330139678)]

In [None]:
with open("predictions.pkl", "wb") as file:
    pickle.dump(predictions, file)