# Parameter Study of GreConDKNNRecommender2

This notebook tries to understand the relation between the parameters of
the GreConDKNNRecommender2 and its performance and ultimately find the
a suggestion for the best parameters.

Copyright 2023 Bernardo C. Rodrigues

See COPYING file for license details

In [None]:
# Setup notebook
import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

In [None]:
# Load folds
from dataset.movie_lens import load_ml_100k_folds

folds = load_ml_100k_folds(False)

In [None]:
# Set global parameters

# Threshold that contros which weights are going to be considered part of an itemset.
WEIGHTS_BINARIZATION_THRESHOLD = 0.7

# Number of neighbors to be considered in the KNN algorithm or KNN-based algorithms.
KNN_K = 20  

# Percentage of the dataset should be covered by the mined formal concepts before grecond stops.
GRECOND_COVERAGE = 0.1

# Threshold that defines wether a rating is considered positive or not before forwarding it to
# GreConD.
DATASET_BINARIZATION_THRESHOLD = 4.0

# Minimum bicluster sparsity derived from a pattern to be considered in the neighborhood of a user.
MINIMUM_PATTERN_BICLUSTER_SPARSITY = 0.0

# The threshold used to binarize the user when generating the user-item neighborhood.
USER_BINARIZATION_THRESHOLD = 1.0

# How many patterns should be considered when generating the user-item neighborhood. High values
# mean all patterns will be considered.
TOP_K_PATTERNS = 10000

# How many items should be considered when estimating the a rating for a user-item pair.
KNN_K = 20

# Threshold that defines wether a rating is considered relevant in the metrics computation.
RELEVANCE_THRESHOLD = 4.0

# Number of top recommendations to be considered in the metrics computation (e.g. precision@k).
NUMBER_OF_TOP_RECOMMENDATIONS = 20

# Number of times the each experiment should be repeated. Keep in mind that an experiment already
# involves a 5 fold cross validation.
REPEAT = 1

In [None]:
# Define the recommender variations to be used in the experiments.

from recommenders import grecond_recommender
import evaluation.threads as threads

# Sweep on grecond coverage
grecond_coverages = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]


recommender_variations = [
    threads.RecommenderVariation(
        str(coverage),
        grecond_recommender.GreConDKNNRecommender2(
            coverage,
            DATASET_BINARIZATION_THRESHOLD,
            MINIMUM_PATTERN_BICLUSTER_SPARSITY,
            USER_BINARIZATION_THRESHOLD,
            TOP_K_PATTERNS,
            KNN_K,
        ),
    )
    for coverage in grecond_coverages
]

In [None]:
# Run the benchmarks on MovieLens 100k
from evaluation.threads import grecond_knn_2_recommender_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    grecond_knn_2_recommender_benchmark_thread,
)

In [None]:
# Plot results
from evaluation.threads import GENERIC_METRIC_NAMES

for metric_name in GENERIC_METRIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

In [None]:
# Benchmark MovieLens 1M
from dataset.movie_lens import load_ml_100k_folds as load_ml_1m_folds
from recommenders import grecond_recommender
import evaluation.threads as threads
import evaluation.plot as plot
from evaluation.threads import grecond_knn_2_recommender_benchmark_thread, GENERIC_METRIC_NAMES


folds = load_ml_1m_folds()

grecond_coverages = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(coverage),
        grecond_recommender.GreConDKNNRecommender2(
            coverage,
            DATASET_BINARIZATION_THRESHOLD,
            MINIMUM_PATTERN_BICLUSTER_SPARSITY,
            USER_BINARIZATION_THRESHOLD,
            TOP_K_PATTERNS,
            KNN_K,
        ),
    )
    for coverage in grecond_coverages
]

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    grecond_knn_2_recommender_benchmark_thread,
)

for metric_name in GENERIC_METRIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))