# Hyperparameter Search for GreConD UB-PAKNN Recommender

This notebook finds the best hyperparameters for GreConD UB-PAKNN Recommender.

Copyright 2023 Bernardo C. Rodrigues

See COPYING file for license details

In [None]:
# Setup notebook

import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

# Evaluation parameters
RELEVANCE_THRESHOLD = 4.0
NUMBER_OF_TOP_RECOMMENDATIONS = 20
REPEAT = 1 # Since GreConDKnn2 is deterministic, we don't need to repeat the experiment.

def get_number_of_combinations(parameters_grid):
    n_combinations = 1
    for parameter_values in parameters_grid.values():
        n_combinations *= len(parameter_values)
    print(f"Number of different combinations in the grid: {n_combinations}")

#### Randomized search

In [None]:
from surprise.model_selection import RandomizedSearchCV
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds(predefined=True)

parameters_grid = {
    "grecond_coverage": [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    "dataset_binarization_threshold": [1.0, 2.0, 3.0, 4.0, 5.0],
    "minimum_bicluster_sparsity": [None, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1],
    "minimum_bicluster_coverage": [None, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1],
    "minimum_bicluster_relative_size": [None, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1],
    "user_binarization_threshold": [1.0, 2.0, 3.0, 4.0, 5.0],
    "number_of_top_k_biclusters": [None, 1, 10, 50, 100],
    "knn_k": [1, 5, 10, 20, 50, 100],
    "knn_type": ["user"],
}

get_number_of_combinations(parameters_grid)

search = RandomizedSearchCV(
    algo_class=grecond_recommender.GreConDBiAKNNRecommender,
    param_distributions=parameters_grid,
    n_iter=2000
    measures=["rmse"],
    cv=k_fold,
    n_jobs=-1,
    joblib_verbose=10,
)

search.fit(data)

Best parameters on randomized search:
```python
{
    'grecond_coverage': 0.9,
    'dataset_binarization_threshold': 2.0,
    'minimum_bicluster_sparsity': 0.01,
    'minimum_bicluster_coverage': 0.0001,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 2.0,
    'number_of_top_k_biclusters': 10,
    'knn_k': 50,
    'knn_type': 'user'
 }
```

Now we will perform a grid search around these parameters to refine the
results. In addition, given our previous experience, we will also
include disabling minimum_bicluster_sparsity, minimum_bicluster_coverage
minimum_bicluster_relative_size and number_of_top_k_biclusters filters.
Disabling these filters tend to improve the performance of the
recommender. We will perform a sweep on grecond_coverage, and knn_k 
afterwards.

#### Local grid search

In [None]:
from surprise.model_selection import GridSearchCV
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds(predefined=True)

parameters_grid = {
    "grecond_coverage": [0.9],
    "dataset_binarization_threshold": [1.0, 2.0, 3.0],
    "minimum_bicluster_sparsity": [None, 0.001, 0.01, 0.1],
    "minimum_bicluster_coverage": [None, 0.00001, 0.0001, 0.001],
    "minimum_bicluster_relative_size": [None, 1e-7, 1e-6],
    "user_binarization_threshold": [1.0, 2.0, 3.0],
    "number_of_top_k_biclusters": [None, 5, 10, 15],
    "knn_k": [50],
    "knn_type": ["user"],
}

get_number_of_combinations(parameters_grid)

search = GridSearchCV(
    algo_class=grecond_recommender.GreConDBiAKNNRecommender,
    param_grid=parameters_grid,
    measures=["rmse"],
    cv=k_fold,
    n_jobs=-1,
    joblib_verbose=10,
)

search.fit(data)

Best parameters on local grid search:
```python
{
    'grecond_coverage': 0.9,
    'dataset_binarization_threshold': 1.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 2.0,
    'number_of_top_k_biclusters': 10,
    'knn_k': 50,
    'knn_type': 'user'
 }
```

Now we will perform a sweep search around these parameters to refine the
results. Perhaps we can find a better trade-off between performance and
fit time. We will perform a sweep on grecond_coverage.

#### Sweeps around best parameters

##### grecond_coverage

In [None]:
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_100k_folds(predefined=True)

best_params_user = {
    'grecond_coverage': 0.9,
    'dataset_binarization_threshold': 1.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 2.0,
    'number_of_top_k_biclusters': 10,
    'knn_k': 50,
    'knn_type': 'user'
 }

folds = resolve_folds(data, k_fold)

grecond_coverages = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(coverage),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=coverage,
            dataset_binarization_threshold=best_params_user["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=best_params_user["knn_k"],
        ),
    )
    for coverage in grecond_coverages
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
)


In [None]:
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

In [None]:
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best tradeoff grecond_coverage: 0.2

##### dataset_binarization_threshold

In [None]:
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_100k_folds(predefined=True)

best_params_user = {
    "grecond_coverage": 0.9,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 2.0,
    "number_of_top_k_biclusters": 10,
    "knn_k": 50,
    "knn_type": "user",
}

folds = resolve_folds(data, k_fold)

binarization_thresholds = [1.0, 2.0, 3.0, 4.0, 5.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(binarization_threshold),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user["grecond_coverage"],
            dataset_binarization_threshold=binarization_threshold,
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=best_params_user["knn_k"],
        ),
    )
    for binarization_threshold in binarization_thresholds
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    grecond_biaknn_benchmark_thread,
)



In [None]:
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best dataset_binarization_threshold: 1.0

##### user_binarization_threshold

In [None]:
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_100k_folds(predefined=True)

best_params_user = {
    "grecond_coverage": 0.9,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 2.0,
    "number_of_top_k_biclusters": 10,
    "knn_k": 50,
    "knn_type": "user",
}

folds = resolve_folds(data, k_fold)

binarization_thresholds = [1.0, 2.0, 3.0, 4.0, 5.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(binarization_threshold),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user["grecond_coverage"],
            dataset_binarization_threshold=best_params_user["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=binarization_threshold,
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=best_params_user["knn_k"],
        ),
    )
    for binarization_threshold in binarization_thresholds
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    grecond_biaknn_benchmark_thread,
)

from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best user_binarization_threshold: 4.0

##### knn_k

In [None]:
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_100k_folds(predefined=True)

best_params_user = {
    "grecond_coverage": 0.9,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 2.0,
    "number_of_top_k_biclusters": 10,
    "knn_k": 50,
    "knn_type": "user",
}

folds = resolve_folds(data, k_fold)

knn_ks = [1, 5, 10, 20, 50, 100, 200]

recommender_variations = [
    threads.RecommenderVariation(
        str(knn_k),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user["grecond_coverage"],
            dataset_binarization_threshold=best_params_user["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=knn_k,
        ),
    )
    for knn_k in knn_ks
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    grecond_biaknn_benchmark_thread,
)

from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best knn_k: 50

##### number_of_top_k_biclusters

In [None]:
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_100k_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_100k_folds(predefined=True)

best_params_user = {
    "grecond_coverage": 0.9,
    "dataset_binarization_threshold": 1.0,
    "minimum_bicluster_sparsity": None,
    "minimum_bicluster_coverage": None,
    "minimum_bicluster_relative_size": None,
    "user_binarization_threshold": 2.0,
    "number_of_top_k_biclusters": 10,
    "knn_k": 50,
    "knn_type": "user",
}

folds = resolve_folds(data, k_fold)

numbers_of_top_k_biclusters = [None, 1, 2, 4, 6, 8, 10]

recommender_variations = [
    threads.RecommenderVariation(
        str(number_of_top_k_biclusters),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user["grecond_coverage"],
            dataset_binarization_threshold=best_params_user["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=number_of_top_k_biclusters,
            knn_k=best_params_user['knn_k'],
        ),
    )
    for number_of_top_k_biclusters in numbers_of_top_k_biclusters
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    REPEAT,
    RELEVANCE_THRESHOLD,
    NUMBER_OF_TOP_RECOMMENDATIONS,
    grecond_biaknn_benchmark_thread,
)

from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

In [None]:
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    plot.plot_metric_box_plot(metric_name, results)
    print(plot.get_result_table(metric_name, results))

Best number_of_top_k_biclusters: 5

#### Best parameters

```python
{
    'grecond_coverage': 0.2,
    'dataset_binarization_threshold': 2.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 4.0,
    'number_of_top_k_biclusters': 5,
    'knn_k': 50,
    'knn_type': 'user'
 }
```