# Comparison between the recommender systems

This notebook compares different recommender systems in terms of various metrics. We benchmark the
algorithms on the MovieLens 100k dataset and Movielens 1M dataset. 

Copyright 2023 Bernardo C. Rodrigues

See COPYING file for license details

In [None]:
# Setup notebook
import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

In [None]:
# Load folds
from dataset.movie_lens import load_ml_1m_folds, resolve_folds

folds = resolve_folds(*load_ml_1m_folds())

In [None]:
# Threshold that defines wether a rating is considered relevant in the metrics computation.
RELEVANCE_THRESHOLD = 4.0

# Number of top recommendations to be considered in the metrics computation (e.g. precision@k).
NUMBER_OF_TOP_RECOMMENDATIONS = 20

# Number of times the each experiment should be repeated. Keep in mind that an experiment already
# involves a 5 fold cross validation.
REPEAT = 1

In [None]:
# Define the recommender variations to be used in the experiments.
from surprise.prediction_algorithms import (
    SVD,
    KNNWithMeans,
    CoClustering,
)
from recommenders import grecond_recommender, binaps_based_recommenders, common
import evaluation.threads as threads

parallel_recommender_variations = [
    threads.RecommenderVariation(
        "GreConD+IBCAkNN",
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=0.1,
            dataset_binarization_threshold=3.0,
            minimum_bicluster_sparsity=None,
            minimum_bicluster_coverage=None,
            minimum_bicluster_relative_size=None,
            knn_type="item",
            user_binarization_threshold=1.0,
            number_of_top_k_biclusters=None,
            knn_k=50,
        ),
    ),
    threads.RecommenderVariation(
        "IBkNN",
        KNNWithMeans(sim_options={"name": "cosine", "user_based": False}, verbose=False),
    ),
    threads.RecommenderVariation(
        "GreConD+UBCAkNN",
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=0.4,
            dataset_binarization_threshold=5.0,
            minimum_bicluster_sparsity=None,
            minimum_bicluster_coverage=None,
            minimum_bicluster_relative_size=None,
            knn_type="user",
            user_binarization_threshold=1.0,
            number_of_top_k_biclusters=None,
            knn_k=200,
        ),
    ),
    threads.RecommenderVariation(
        "UBkNN",
        KNNWithMeans(sim_options={"name": "cosine", "user_based": True}, verbose=False),
    ),
    threads.RecommenderVariation("SVD", SVD(verbose=False)),
    threads.RecommenderVariation("CoClustering", CoClustering(verbose=False)),
    (
        "GreConD+MF+UBkNN",
        grecond_recommender.GreConDKNNRecommender(
            grecond_coverage=1.0,
            dataset_binarization_threshold=1.0,
            knn_k=30,
            knn_distance_strategy=common.get_cosine_similarity_matrix,
        ),
    ),
]

sequential_recommender_variations = [
    threads.RecommenderVariation(
        "BinaPs+IBCAkNN",
        binaps_based_recommenders.BinaPsKNNRecommender(
            epochs=1000,
            dataset_binarization_threshold=2.5,
            minimum_bicluster_sparsity=None,
            minimum_bicluster_coverage=None,
            minimum_bicluster_relative_size=None,
            knn_type="item",
            user_binarization_threshold=0.0,
            number_of_top_k_biclusters=5,
            knn_k=10,
        ),
    )
]

In [None]:
from evaluation.plot import benchmark
from evaluation.threads import generic_benchmark_thread

results = benchmark(
    folds,
    parallel_recommender_variations,
    sequential_recommender_variations,
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    generic_benchmark_thread,
    thread_count=10
)

In [None]:
import json

with open('comparison_1m.results', 'w') as file_object:
    json.dump(results, file_object, indent=4)

In [None]:
import json


# Plot results
from evaluation.plot import plot_metric_box_plot, get_result_table
from evaluation.threads import GENERIC_METRIC_NAMES

with open('comparison_1m.results', 'r') as file_object:
    results = json.load(file_object)

for metric_name in GENERIC_METRIC_NAMES:
    # plot_metric_box_plot(metric_name, results)
    print(metric_name)
    print(get_result_table(metric_name, results))
    print()