# Metaparameter Study of GreConDBiAKNNRecommender

This notebook tries to understand the relation between the parameters of
the GreConDBiAKNNRecommender and its performance and ultimately find the
a suggestion for the best parameters.

Copyright 2023 Bernardo C. Rodrigues

See COPYING file for license details

In [None]:
# Setup notebook

import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

# Evaluation parameters
RELEVANCE_THRESHOLD = 4.0
NUMBER_OF_TOP_RECOMMENDATIONS = 20
REPEAT = 1 # Since GreConDKnn2 is deterministic, we don't need to repeat the experiment.

def get_number_of_combinations(parameters_grid):
    n_combinations = 1
    for parameter_values in parameters_grid.values():
        n_combinations *= len(parameter_values)
    print(f"Number of different combinations in the grid: {n_combinations}")

#### Randomized search

In [None]:
from surprise.model_selection import RandomizedSearchCV
from recommenders import grecond_recommender

from dataset.movie_lens import load_ml_1m_folds

data, k_fold = load_ml_1m_folds()

parameters_grid = {
    "grecond_coverage": [0.01, 0.2, 0.4, 0.6, 0.8, 1.0],
    "dataset_binarization_threshold": [1.0, 3.0, 5.0],
    "minimum_bicluster_sparsity": [None, 1e-07, 1e-05, 1e-03, 1e-01],
    "minimum_bicluster_coverage": [None, 1e-07, 1e-05, 1e-03, 1e-01],
    "minimum_bicluster_relative_size": [1e-07, 1e-05, 1e-03, 1e-01],
    "user_binarization_threshold": [1.0, 3.0, 5.0],
    "number_of_top_k_biclusters": [None, 1, 10, 100],
    "knn_k": [1, 10, 100],
    "knn_type": ["item"],
}

get_number_of_combinations(parameters_grid)

search = RandomizedSearchCV(
    grecond_recommender.GreConDBiAKNNRecommender,
    parameters_grid,
    measures=["rmse"],
    cv=k_fold,
    n_jobs=-2,
    joblib_verbose=10,
    n_iter=100
)

search.fit(data)


print(search.best_params['rmse'])

Best parameters on randomized search:
```python
{
    'grecond_coverage': 0.6, 
    'dataset_binarization_threshold': 3.0, 
    'minimum_bicluster_sparsity': 0.001, 
    'minimum_bicluster_coverage': None, 
    'minimum_bicluster_relative_size': 1e-07, 
    'user_binarization_threshold': 3.0, 
    'number_of_top_k_biclusters': None, 
    'knn_k': 100, 
    'knn_type': 'item'
}
```

#### Local grid search

In [None]:
from surprise.model_selection import GridSearchCV
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_1m_folds

data, k_fold = load_ml_1m_folds()

parameters_grid = {
    "grecond_coverage": [0.6],
    "dataset_binarization_threshold": [3.0, 4.0],
    "minimum_bicluster_sparsity": [None, 1e-03],
    "minimum_bicluster_coverage": [None, 1e-07],
    "minimum_bicluster_relative_size": [None, 1e-07],
    "user_binarization_threshold": [3.0, 4.0],
    "number_of_top_k_biclusters": [None, 10],
    "knn_k": [100],
    "knn_type": ["item"],
}

get_number_of_combinations(parameters_grid)

search = GridSearchCV(
    algo_class=grecond_recommender.GreConDBiAKNNRecommender,
    param_grid=parameters_grid,
    measures=["rmse"],
    cv=k_fold,
    n_jobs=-1,
    joblib_verbose=10,
)

search.fit(data)

print(search.best_params['rmse'])

Best parameters on randomized search:
```python
{
    'grecond_coverage': 0.6,
    'dataset_binarization_threshold': 3.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 3.0,
    'number_of_top_k_biclusters': None,
    'knn_k': 100,
    'knn_type': 'item'
}
```

#### Sweeps around best parameters

##### grecond_coverage

In [None]:
import json
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_1m_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_1m_folds()

best_params_user = {
    'grecond_coverage': 0.6,
    'dataset_binarization_threshold': 3.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 3.0,
    'number_of_top_k_biclusters': None,
    'knn_k': 100,
    'knn_type': 'item'
 }

folds = resolve_folds(data, k_fold)

grecond_coverages = [0.01, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(coverage),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=coverage,
            dataset_binarization_threshold=best_params_user["dataset_binarization_threshold"],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=best_params_user["knn_k"],
        ),
    )
    for coverage in grecond_coverages
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
    thread_count=8
)

with open('GreConD_IBPAKNN_sweep_grecond_coverage.results', 'w') as file_object:
    json.dump(results, file_object, indent=4)

In [None]:
import json
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

with open('GreConD_IBPAKNN_sweep_grecond_coverage.results', 'r') as file_object:
    results = json.load(file_object)

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    print(metric_name)
    print(plot.get_result_table(metric_name, results))
    print()

In [None]:
import json
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_1m_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_1m_folds()

best_params_user = {
    'grecond_coverage': 0.1,
    'dataset_binarization_threshold': 3.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 3.0,
    'number_of_top_k_biclusters': None,
    'knn_k': 100,
    'knn_type': 'item'
 }

folds = resolve_folds(data, k_fold)

dataset_binarization_thresholds = [1.0, 2.0, 3.0, 4.0, 5.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(dataset_binarization_threshold),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user['grecond_coverage'],
            dataset_binarization_threshold=dataset_binarization_threshold,
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=best_params_user["knn_k"],
        ),
    )
    for dataset_binarization_threshold in dataset_binarization_thresholds
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
    thread_count=8
)

with open('GreConD_IBPAKNN_sweep_dataset_binarization_threshold.results', 'w') as file_object:
    json.dump(results, file_object, indent=4)

In [None]:
import json
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

with open('GreConD_IBPAKNN_sweep_dataset_binarization_threshold.results', 'r') as file_object:
    results = json.load(file_object)

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    print(metric_name)
    print(plot.get_result_table(metric_name, results))
    print()

##### user_binarization_threshold

In [None]:
import json
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_1m_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_1m_folds()

best_params_user = {
    'grecond_coverage': 0.1,
    'dataset_binarization_threshold': 3.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 3.0,
    'number_of_top_k_biclusters': None,
    'knn_k': 100,
    'knn_type': 'item'
 }

folds = resolve_folds(data, k_fold)

user_binarization_thresholds = [1.0, 2.0, 3.0, 4.0, 5.0]

recommender_variations = [
    threads.RecommenderVariation(
        str(user_binarization_threshold),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user['grecond_coverage'],
            dataset_binarization_threshold=best_params_user['dataset_binarization_threshold'],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=user_binarization_threshold,
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=best_params_user["knn_k"],
        ),
    )
    for user_binarization_threshold in user_binarization_thresholds
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
    thread_count=8
)

with open('GreConD_IBPAKNN_sweep_user_binarization_threshold.results', 'w') as file_object:
    json.dump(results, file_object, indent=4)

In [None]:
import json
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

with open('GreConD_IBPAKNN_sweep_user_binarization_threshold.results', 'r') as file_object:
    results = json.load(file_object)

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    print(metric_name)
    print(plot.get_result_table(metric_name, results))
    print()

##### knn_k

In [None]:
import json
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_1m_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_1m_folds()

best_params_user = {
    'grecond_coverage': 0.1,
    'dataset_binarization_threshold': 3.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 3.0,
    'number_of_top_k_biclusters': None,
    'knn_k': 100,
    'knn_type': 'item'
 }

folds = resolve_folds(data, k_fold)

knn_ks = [1, 20, 50, 100, 200, 500, 1000]

recommender_variations = [
    threads.RecommenderVariation(
        str(knn_k),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user['grecond_coverage'],
            dataset_binarization_threshold=best_params_user['dataset_binarization_threshold'],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=best_params_user["number_of_top_k_biclusters"],
            knn_k=knn_k,
        ),
    )
    for knn_k in knn_ks
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
    thread_count=8
)

with open('GreConD_IBPAKNN_sweep_knn_k.results', 'w') as file_object:
    json.dump(results, file_object, indent=4)

In [None]:
import json
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

with open('GreConD_IBPAKNN_sweep_knn_k.results', 'r') as file_object:
    results = json.load(file_object)

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    print(metric_name)
    print(plot.get_result_table(metric_name, results))
    print()

##### number_of_top_k_biclusters

In [None]:
import json
from evaluation import threads
from recommenders import grecond_recommender
from dataset.movie_lens import load_ml_1m_folds
from dataset.movie_lens import resolve_folds

data, k_fold = load_ml_1m_folds()

best_params_user = {
    'grecond_coverage': 0.1,
    'dataset_binarization_threshold': 3.0,
    'minimum_bicluster_sparsity': None,
    'minimum_bicluster_coverage': None,
    'minimum_bicluster_relative_size': None,
    'user_binarization_threshold': 3.0,
    'number_of_top_k_biclusters': None,
    'knn_k': 100,
    'knn_type': 'item'
 }

folds = resolve_folds(data, k_fold)

numbers_of_top_k_biclusters = [None, 1, 5, 10, 50, 100, 200]

recommender_variations = [
    threads.RecommenderVariation(
        str(number_of_top_k_biclusters),
        grecond_recommender.GreConDBiAKNNRecommender(
            grecond_coverage=best_params_user['grecond_coverage'],
            dataset_binarization_threshold=best_params_user['dataset_binarization_threshold'],
            minimum_bicluster_sparsity=best_params_user["minimum_bicluster_sparsity"],
            minimum_bicluster_coverage=best_params_user["minimum_bicluster_coverage"],
            minimum_bicluster_relative_size=best_params_user["minimum_bicluster_relative_size"],
            knn_type=best_params_user["knn_type"],
            user_binarization_threshold=best_params_user["user_binarization_threshold"],
            number_of_top_k_biclusters=number_of_top_k_biclusters,
            knn_k=best_params_user['knn_k'],
        ),
    )
    for number_of_top_k_biclusters in numbers_of_top_k_biclusters
]

from evaluation.threads import grecond_biaknn_benchmark_thread

results = plot.benchmark(
    folds,
    recommender_variations,
    [],
    repeats=REPEAT,
    relevance_threshold=RELEVANCE_THRESHOLD,
    number_of_top_recommendations=NUMBER_OF_TOP_RECOMMENDATIONS,
    benchmark_thread=grecond_biaknn_benchmark_thread,
    thread_count=8
)

with open('GreConD_IBPAKNN_sweep_number_of_top_k_biclusters.results', 'w') as file_object:
    json.dump(results, file_object, indent=4)

In [None]:
import json
from evaluation.threads import GRECOND_BIAKNN_METIC_NAMES

with open('GreConD_IBPAKNN_sweep_number_of_top_k_biclusters.results', 'r') as file_object:
    results = json.load(file_object)

for metric_name in GRECOND_BIAKNN_METIC_NAMES:
    print(metric_name)
    print(plot.get_result_table(metric_name, results))
    print()