# Parameter Study of GreConDKNNRecommender2

This notebook tries to understand the relation between the parameters of
the GreConDKNNRecommender2 and its performance and ultimately find the
a suggestion for the best parameters.

Copyright 2023 Bernardo C. Rodrigues

See COPYING file for license details

In [None]:
# Setup notebook
import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

In [None]:
# Load folds
from dataset.movie_lens import load_ml_100k_folds

folds = load_ml_100k_folds(predefined=True)

In [None]:
# Set parameters

# Recommender parameters
GRECOND_COVERAGE = 1.0
DATASET_BINARIZATION_THRESHOLD = 1.0
MINIMUM_BICLUSTER_SPARSITY = None
MINIMUM_BICLUSTER_COVERAGE = None
MINIMUM_BICLUSTER_RELATIVE_SIZE = None
KNN_TYPE = "item"
USER_BINARIZATION_THRESHOLD = 1.0
NUMBER_OF_TOP_K_BICLUSTERS = None
KNN_K = 10

# Evaluation parameters
RELEVANCE_THRESHOLD = 4.0
NUMBER_OF_TOP_RECOMMENDATIONS = 20
REPEAT = 1 # Since GreConDKnn2 is deterministic, we don't need to repeat the experiment.


# minimum_bicluster_sparsity: Optional[float] = 0.08,
# minimum_bicluster_coverage: Optional[float] = 0.1,
# minimum_bicluster_relative_size: Optional[int] = 0.0003,

In [None]:
# Define the recommender variations to be used in the experiments.

from recommenders import grecond_recommender
import evaluation.threads as threads

# Sweep on grecond coverage
# grecond_coverages = [0.01, 0.05, 0.1, 0.2, 0.3]
grecond_coverages = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]


recommender_variations = [
    threads.RecommenderVariation(
        str(coverage),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=coverage,
            dataset_binarization_threshold=DATASET_BINARIZATION_THRESHOLD,
            minimum_bicluster_sparsity=MINIMUM_BICLUSTER_SPARSITY,
            minimum_bicluster_coverage=MINIMUM_BICLUSTER_COVERAGE,
            minimum_bicluster_relative_size=MINIMUM_BICLUSTER_RELATIVE_SIZE,
            knn_type=KNN_TYPE,
            user_binarization_threshold=USER_BINARIZATION_THRESHOLD,
            number_of_top_k_biclusters=NUMBER_OF_TOP_K_BICLUSTERS,
            knn_k=KNN_K,
        ),
    )
    for coverage in grecond_coverages
]

In [None]:
# Run the benchmarks on MovieLens 100k
from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    grecond_biaknn_benchmark_thread,
)

In [None]:
# Plot results
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

In [None]:
# Benchmark MovieLens 1M
from dataset.movie_lens import load_ml_1m_folds
from recommenders import grecond_recommender
import evaluation.threads as threads
import evaluation.plot as plot
from evaluation.threads import (
    biaknn_benchmark_thread,
    GENERIC_METRIC_NAMES,
    BIAKNN_METRIC_NAMES,
)


folds = load_ml_100k_folds()

grecond_coverages = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(coverage),
        grecond_recommender.GreConDBiAKNNRecommender(
            coverage,
            DATASET_BINARIZATION_THRESHOLD,
            MINIMUM_PATTERN_BICLUSTER_SPARSITY,
            USER_BINARIZATION_THRESHOLD,
            TOP_K_PATTERNS,
            KNN_K,
            True
        ),
    )
    for coverage in grecond_coverages
]

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    biaknn_benchmark_thread,
)

for metric_name in GENERIC_METRIC_NAMES + BIAKNN_METRIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

In [None]:
# Benchmark MovieLens 1M
from dataset.movie_lens import load_ml_1m_folds
from recommenders import grecond_recommender
import evaluation.threads as threads
import evaluation.plot as plot
from evaluation.threads import (
    biaknn_benchmark_thread,
    GENERIC_METRIC_NAMES,
    BIAKNN_METRIC_NAMES,
)


folds = load_ml_1m_folds()

grecond_coverages = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(coverage),
        grecond_recommender.GreConDBiAKNNRecommender(
            coverage,
            DATASET_BINARIZATION_THRESHOLD,
            MINIMUM_PATTERN_BICLUSTER_SPARSITY,
            USER_BINARIZATION_THRESHOLD,
            TOP_K_PATTERNS,
            KNN_K,
            True
        ),
    )
    for coverage in grecond_coverages
]

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    biaknn_benchmark_thread,
)

for metric_name in GENERIC_METRIC_NAMES + BIAKNN_METRIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))