In [9]:
from surprise import accuracy, Dataset, SVD
from surprise.model_selection import train_test_split
from collections import defaultdict

# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin("ml-100k")

# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=0.25)

# We'll use the famous SVD algorithm.
algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 0.9428


0.9427510686806129

In [4]:
model = algo.fit(trainset)
predictions = model.test(testset)
print(predictions)

[Prediction(uid='608', iid='287', r_ui=3.0, est=3.2301561427626413, details={'was_impossible': False}), Prediction(uid='707', iid='86', r_ui=4.0, est=3.843709831438971, details={'was_impossible': False}), Prediction(uid='311', iid='186', r_ui=3.0, est=3.771417847809072, details={'was_impossible': False}), Prediction(uid='119', iid='544', r_ui=2.0, est=3.777444634511057, details={'was_impossible': False}), Prediction(uid='236', iid='58', r_ui=2.0, est=3.4169050832821806, details={'was_impossible': False}), Prediction(uid='290', iid='629', r_ui=3.0, est=3.0810033143237203, details={'was_impossible': False}), Prediction(uid='692', iid='1040', r_ui=2.0, est=2.4105697796831356, details={'was_impossible': False}), Prediction(uid='388', iid='333', r_ui=5.0, est=4.360095273963477, details={'was_impossible': False}), Prediction(uid='92', iid='559', r_ui=3.0, est=2.886694155014684, details={'was_impossible': False}), Prediction(uid='913', iid='234', r_ui=4.0, est=3.5298035256111806, details={'wa

In [10]:
def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [11]:
predictions = algo.test(testset)
top_n = get_top_n(predictions, n=10)
# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

324 ['127', '275', '475', '270', '286', '690', '879', '276', '508', '1094']
221 ['50', '318', '64', '23', '128', '181', '684', '847', '129', '461']
144 ['172', '316', '180', '165', '181', '285', '14', '190', '423', '197']
85 ['64', '657', '172', '513', '498', '528', '604', '606', '57', '663']
271 ['357', '64', '318', '173', '199', '9', '215', '98', '169', '963']
202 ['318', '96', '1', '204', '269', '516', '179']
868 ['156', '169', '480', '153', '191', '382', '64', '173', '427', '475']
498 ['192', '474', '134', '12', '127', '515', '182', '136', '98', '603']
580 ['181', '258', '250', '121', '15', '1', '125', '343', '249', '323']
95 ['216', '180', '657', '208', '520', '423', '198', '178', '168', '736']
98 ['523', '659', '116', '517', '47', '163', '88', '629', '322']
805 ['100', '127', '169', '317', '179', '200', '93', '180', '475', '715']
561 ['171', '197', '223', '515', '185', '479', '135', '496', '116', '484']
523 ['114', '285', '514', '523', '56', '116', '194', '189', '181', '242']
416