# Evaluation of Recommender Systems

This notebook exemplifies how to evaluate the performance of a recommender
system using the implementation from evaluation.benchmark module.

Copyright 2024 Bernardo C. Rodrigues

See COPYING file for license details

In [1]:
# Setup notebook
import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

In [2]:
from dataset.common import resolve_folds
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds()
folds = resolve_folds(data, k_fold)

Already downloaded!. Nothing to do.


In [3]:
from evaluation.strategies import (
    MAEStrategy,
    RMSEStrategy,
    MicroAveragedRecallStrategy,
    MacroAveragedRecallStrategy,
    RecallAtKStrategy,
    MicroAveragedPrecisionStrategy,
    MacroAveragedPrecisionStrategy,
    PrecisionAtKStrategy,
    CountImpossiblePredictionsStrategy,
)

train_measures = [ ]

test_measures = [
    MAEStrategy(verbose=False),
    RMSEStrategy(verbose=False),
    MicroAveragedRecallStrategy(threshold=4.0),
    MacroAveragedRecallStrategy(threshold=4.0),
    RecallAtKStrategy(k=20, threshold=4.0),
    MicroAveragedPrecisionStrategy(threshold=4.0),
    MacroAveragedPrecisionStrategy(threshold=4.0),
    PrecisionAtKStrategy(k=20, threshold=4.0),
    CountImpossiblePredictionsStrategy(),
]


In [4]:

from evaluation.benchmark import fit_and_score
from surprise.prediction_algorithms import SVD


recommender = SVD()

_, (trainset, testset) = folds[0]

test_measurements, train_measurements, fit_time, test_time = fit_and_score(
    recommender_system=recommender,
    trainset=trainset,
    testset=testset,
    test_measures=test_measures,
    train_measures=train_measures,
)


In [5]:
for measure, measurement in test_measurements.items():
    print(f"{measure:<30}|  {measurement:.3f}")

print(f"Fit time:                     |  {fit_time:.3f}")
print(f"Test time:                    |  {test_time:.3f}")


mae                           |  0.750
rmse                          |  0.951
micro_averaged_recall         |  0.383
macro_averaged_recall         |  0.334
recall_at_20                  |  0.372
micro_averaged_precision      |  0.842
macro_averaged_precision      |  0.696
precision_at_20               |  0.770
count_impossible_predictions  |  0.000
Fit time:                     |  0.268
Test time:                    |  0.038
