In [43]:
import sys
import surprise

from recommenders.utils.timer import Timer
# from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import (
    rmse,
    mae,
    rsquared,
    exp_var,
    map_at_k,
    ndcg_at_k,
    precision_at_k,
    recall_at_k,
    get_top_k_items,
)
from recommenders.models.surprise.surprise_utils import (
    predict,
    compute_ranking_predictions,
)
from recommenders.utils.notebook_utils import store_metadata
import pandas as pd
from sklearn.metrics import r2_score

print(f"System version: {sys.version}")
print(f"Surprise version: {surprise.__version__}")

System version: 3.11.4 (v3.11.4:d2340ef257, Jun  6 2023, 19:15:51) [Clang 13.0.0 (clang-1300.0.29.30)]
Surprise version: 1.1.4


In [6]:
# Top k items to recommend
TOP_K = 10
root = 'ml-latest-small'

data = pd.read_csv(f'{root}/ratings.csv').drop('timestamp', axis=1)
global_mean = data['rating'].mean()
print(f'Global mean: {global_mean:.2f}')
data.head()

Global mean: 3.50


Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [7]:
train, test = python_random_split(data, 0.75)

In [8]:
# 'reader' is being used to get rating scale (for MovieLens, the scale is [1, 5]).
# 'rating_scale' parameter can be used instead for the later version of surprise lib:
# https://github.com/NicolasHug/Surprise/blob/master/surprise/dataset.py
train_set = surprise.Dataset.load_from_df(
    train, reader=surprise.Reader("ml-100k")
).build_full_trainset()
train_set

<surprise.trainset.Trainset at 0x286b35510>

In [9]:
svd = surprise.SVD(random_state=0, n_factors=200, n_epochs=30, verbose=True)

with Timer() as train_time:
    svd.fit(train_set)

print(f"Took {train_time.interval} seconds for training.")

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 20
Processing epoch 21
Processing epoch 22
Processing epoch 23
Processing epoch 24
Processing epoch 25
Processing epoch 26
Processing epoch 27
Processing epoch 28
Processing epoch 29
Took 1.949329083028715 seconds for training.


In [12]:
predictions = predict(svd, test, usercol="userId", itemcol="movieId")
predictions.head()

Unnamed: 0,userId,movieId,prediction
0,432,77866,3.61015
1,288,474,3.488584
2,599,4351,2.80158
3,42,2987,4.157055
4,75,1610,3.803387


In [30]:
with Timer() as test_time:
    all_predictions = compute_ranking_predictions(
        svd, test, usercol="userID", itemcol="itemID", remove_seen=True
    )

print(f"Took {test_time.interval} seconds for prediction.")

Took 20.752840833971277 seconds for prediction.


In [31]:
all_predictions.head()

Unnamed: 0,userID,itemID,prediction
25209,432,474,3.787548
25210,432,4351,3.563464
25211,432,2987,3.731993
25212,432,1610,3.571164
25213,432,177,3.244931


In [40]:
test.columns = ['userID', 'itemID', 'rating']
predictions.columns = ['userID', 'itemID', 'prediction']
test

Unnamed: 0,userID,itemID,rating
67037,432,77866,4.5
42175,288,474,3.0
93850,599,4351,3.0
6187,42,2987,4.0
12229,75,1610,4.0
...,...,...,...
76051,479,135,1.0
35045,234,2414,3.0
14383,91,1968,3.0
46656,306,69406,5.0


In [45]:
predictions

Unnamed: 0,userID,itemID,prediction
0,432,77866,3.610150
1,288,474,3.488584
2,599,4351,2.801580
3,42,2987,4.157055
4,75,1610,3.803387
...,...,...,...
25204,479,135,3.399514
25205,234,2414,3.745184
25206,91,1968,3.900460
25207,306,69406,3.265097


In [46]:
ys = pd.merge(predictions, test, on=['userID', 'itemID'])
y_true = ys['rating']
y_pred = ys['prediction']

r2_score(y_true, y_pred)

0.2826361878496322

In [41]:
eval_rmse = rmse(test, predictions)
eval_mae = mae(test, predictions)
eval_rsquared = rsquared(test, predictions)
eval_exp_var = exp_var(test, predictions)

eval_map = map_at_k(test, all_predictions, col_prediction="prediction", k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction="prediction", k=TOP_K)
eval_precision = precision_at_k(
    test, all_predictions, col_prediction="prediction", k=TOP_K
)
eval_recall = recall_at_k(test, all_predictions, col_prediction="prediction", k=TOP_K)


print(
    "RMSE:\t\t%f" % eval_rmse,
    "MAE:\t\t%f" % eval_mae,
    "rsquared:\t%f" % eval_rsquared,
    "exp var:\t%f" % eval_exp_var,
    sep="\n",
)

print("----")

print(
    "MAP:\t\t%f" % eval_map,
    "NDCG:\t\t%f" % eval_ndcg,
    "Precision@K:\t%f" % eval_precision,
    "Recall@K:\t%f" % eval_recall,
    sep="\n",
)

  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


RMSE:		0.886274
MAE:		0.679173
rsquared:	0.282636
exp var:	0.282714
----
MAP:		0.000000
NDCG:		0.000000
Precision@K:	0.000000
Recall@K:	0.000000


In [42]:
# Record results for tests - ignore this cell
store_metadata("rmse", eval_rmse)
store_metadata("mae", eval_mae)
store_metadata("rsquared", eval_rsquared)
store_metadata("exp_var", eval_exp_var)
store_metadata("map", eval_map)
store_metadata("ndcg", eval_ndcg)
store_metadata("precision", eval_precision)
store_metadata("recall", eval_recall)
store_metadata("train_time", train_time.interval)
store_metadata("test_time", test_time.interval)