# Metaparameter Study of GreConDBiAKNNRecommender

This notebook tries to understand the relation between the parameters of
the GreConDBiAKNNRecommender and its performance and ultimately find the
a suggestion for the best parameters.

Copyright 2023 Bernardo C. Rodrigues

See COPYING file for license details

In [None]:
# Setup notebook

import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

# Evaluation parameters
RELEVANCE_THRESHOLD = 4.0
NUMBER_OF_TOP_RECOMMENDATIONS = 20
REPEAT = 1 # Since GreConDKnn2 is deterministic, we don't need to repeat the experiment.

def get_number_of_combinations(parameters_grid):
    n_combinations = 1
    for parameter_values in parameters_grid.values():
        n_combinations *= len(parameter_values)
    print(f"Number of different combinations in the grid: {n_combinations}")

### Item based KNN

#### Randomized search

In [None]:
from surprise.model_selection import RandomizedSearchCV
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds(predefined=True)

parameters_grid = {
    "grecond_coverage": [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    "dataset_binarization_threshold": [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5],
    "minimum_bicluster_sparsity": [None, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1],
    "minimum_bicluster_coverage": [None, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1],
    "minimum_bicluster_relative_size": [None, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1],
    "user_binarization_threshold": [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5],
    "number_of_top_k_biclusters": [None, 1, 10, 50, 100],
    "knn_k": [1, 5, 10, 20, 50, 100],
    "knn_type": ["item"],
}

search = RandomizedSearchCV(
    grecond_recommender.GreConDBiAKNNRecommender,
    parameters_grid,
    measures=["rmse"],
    cv=k_fold,
    n_jobs=-1,
    joblib_verbose=10,
    n_iter=2000
)

search.fit(data)

Best parameters on randomized search:
```python
{
    "grecond_coverage": 1.0,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": 1e-06,
    "minimum_bicluster_coverage": 1e-05,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
}
```

#### Local grid search

In [None]:
from surprise.model_selection import GridSearchCV
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds(predefined=True)

parameters_grid = {
    "grecond_coverage": [0.9, 1.0],
    "dataset_binarization_threshold": [1.0, 2.0],
    "minimum_bicluster_sparsity": [None, 1e-07, 1e-06],
    "minimum_bicluster_coverage": [None, 1e-06, 1e-05],
    "minimum_bicluster_relative_size": [None, 1e-8, 1e-7],
    "user_binarization_threshold": [1.0, 2.0],
    "number_of_top_k_biclusters": [None, 80, 100],
    "knn_k": [50],
    "knn_type": ["item"],
}

get_number_of_combinations(parameters_grid)

search = GridSearchCV(
    grecond_recommender.GreConDBiAKNNRecommender,
    parameters_grid,
    measures=["rmse"],
    cv=k_fold,
    n_jobs=-1,
    joblib_verbose=10,
)

search.fit(data)

Best parameters on local grid search:
```python
{
    "grecond_coverage": 1.0,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
}
```

#### Sweeps around best parameters

##### grecond_coverage

In [None]:
from evaluation import threads
from dataset.movie_lens import load_ml_100k_folds, resolve_folds
from recommenders import grecond_recommender

data, k_fold = load_ml_100k_folds(predefined=True)

best_tight_params_item_M100K = {
    "grecond_coverage": 1.0,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
}

folds = resolve_folds(data, k_fold)

grecond_coverages = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(grecond_coverage),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=grecond_coverage,
            dataset_binarization_threshold=best_tight_params_item_M100K["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_tight_params_item_M100K["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_tight_params_item_M100K["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_tight_params_item_M100K["minimum_bicluster_relative_size"],
            knn_type=best_tight_params_item_M100K["knn_type"],
            user_binarization_threshold=best_tight_params_item_M100K["user_binarization_threshold"],
            number_of_top_k_biclusters=best_tight_params_item_M100K["number_of_top_k_biclusters"],
            knn_k=best_tight_params_item_M100K["knn_k"],
        ),
    )
    for grecond_coverage in grecond_coverages
]

# Run the benchmarks on MovieLens 100k
from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds=folds,
    parallel_recommender_variations=recommender_variations,
    sequential_recommender_variations=[],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
)

# Plot results
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best tradeoff grecond_coverage: 0.1

##### dataset_binarization_threshold

In [None]:
from evaluation import threads
from dataset.movie_lens import resolve_folds

best_tight_params_item_M100K = {
    "grecond_coverage": 1.0,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
}

folds = resolve_folds(data, k_fold)

dataset_binarization_thresholds = [1.0, 2.0, 3.0, 4.0, 5.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(dataset_binarization_threshold),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_tight_params_item_M100K["grecond_coverage"],
            dataset_binarization_threshold=dataset_binarization_threshold,
            minimum_bicluster_sparsity=best_tight_params_item_M100K["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_tight_params_item_M100K["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_tight_params_item_M100K["minimum_bicluster_relative_size"],
            knn_type=best_tight_params_item_M100K["knn_type"],
            user_binarization_threshold=best_tight_params_item_M100K["user_binarization_threshold"],
            number_of_top_k_biclusters=best_tight_params_item_M100K["number_of_top_k_biclusters"],
            knn_k=best_tight_params_item_M100K["knn_k"],
        ),
    )
    for dataset_binarization_threshold in dataset_binarization_thresholds
]

# Run the benchmarks on MovieLens 100k
from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds=folds,
    parallel_recommender_variations=recommender_variations,
    sequential_recommender_variations=[],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
)

# Plot results
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best dataset_binarization_threshold: 1.0

##### user_binarization_threshold

In [None]:
from evaluation import threads
from dataset.movie_lens import resolve_folds

best_tight_params_item_M100K = {
    "grecond_coverage": 1.0,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
}

folds = resolve_folds(data, k_fold)

dataset_binarization_thresholds = [1.0, 2.0, 3.0, 4.0, 5.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(dataset_binarization_threshold),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_tight_params_item_M100K["grecond_coverage"],
            dataset_binarization_threshold=best_tight_params_item_M100K["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_tight_params_item_M100K["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_tight_params_item_M100K["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_tight_params_item_M100K["minimum_bicluster_relative_size"],
            knn_type=best_tight_params_item_M100K["knn_type"],
            user_binarization_threshold=dataset_binarization_threshold,
            number_of_top_k_biclusters=best_tight_params_item_M100K["number_of_top_k_biclusters"],
            knn_k=best_tight_params_item_M100K["knn_k"],
        ),
    )
    for dataset_binarization_threshold in dataset_binarization_thresholds
]

# Run the benchmarks on MovieLens 100k
from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds=folds,
    parallel_recommender_variations=recommender_variations,
    sequential_recommender_variations=[],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
)

# Plot results
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best user_binarization_threshold: 1.0

##### knn_k

In [None]:
from evaluation import threads
from dataset.movie_lens import resolve_folds

best_tight_params_item_M100K = {
    "grecond_coverage": 1.0,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
}

folds = resolve_folds(data, k_fold)

knn_ks = [1, 5, 10, 20, 50, 100, 200]

recommender_variations = [
    threads.RecommenderVariation(
        str(knn_k),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_tight_params_item_M100K["grecond_coverage"],
            dataset_binarization_threshold=best_tight_params_item_M100K["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_tight_params_item_M100K["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_tight_params_item_M100K["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_tight_params_item_M100K["minimum_bicluster_relative_size"],
            knn_type=best_tight_params_item_M100K["knn_type"],
            user_binarization_threshold=best_tight_params_item_M100K["user_binarization_threshold"],
            number_of_top_k_biclusters=best_tight_params_item_M100K["number_of_top_k_biclusters"],
            knn_k=knn_k,
        ),
    )
    for knn_k in knn_ks
]

# Run the benchmarks on MovieLens 100k
from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds=folds,
    parallel_recommender_variations=recommender_variations,
    sequential_recommender_variations=[],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
)

# Plot results
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best knn_k: 50

##### number_of_top_k_biclusters

In [None]:
from evaluation import threads
from dataset.movie_lens import resolve_folds
from dataset.movie_lens import load_ml_100k_folds
from recommenders import grecond_recommender

data, k_fold = load_ml_100k_folds(predefined=True)

best_tight_params_item_M100K = {
    "grecond_coverage": 1.0,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
}

folds = resolve_folds(data, k_fold)

numbers_of_top_k_biclusters = [None, 1, 5, 10, 50, 100, 200]

recommender_variations = [
    threads.RecommenderVariation(
        str(number_of_top_k_biclusters),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_tight_params_item_M100K["grecond_coverage"],
            dataset_binarization_threshold=best_tight_params_item_M100K["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_tight_params_item_M100K["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_tight_params_item_M100K["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_tight_params_item_M100K["minimum_bicluster_relative_size"],
            knn_type=best_tight_params_item_M100K["knn_type"],
            user_binarization_threshold=best_tight_params_item_M100K["user_binarization_threshold"],
            number_of_top_k_biclusters=number_of_top_k_biclusters,
            knn_k=best_tight_params_item_M100K["knn_k"],
        ),
    )
    for number_of_top_k_biclusters in numbers_of_top_k_biclusters
]

# Run the benchmarks on MovieLens 100k
from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds=folds,
    parallel_recommender_variations=recommender_variations,
    sequential_recommender_variations=[],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
)

# Plot results
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best number_of_top_k_biclusters: 100

#### Best parameters

```python
{
    "grecond_coverage": 0.1,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 1.0,
    "number_of_top_k_biclusters": 100,
    "knn_k": 50,
    "knn_type": "item",
 }
```