In [2]:
# Setup notebook
import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

In [3]:
from dataset.common import resolve_folds
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds()
folds = resolve_folds(data, k_fold)

folds_without_index = [fold[1] for fold in folds]

Already downloaded!. Nothing to do.


In [3]:
from evaluation.strategies import (
    MAEStrategy,
    RMSEStrategy,
    MicroAveragedRecallStrategy,
    MacroAveragedRecallStrategy,
    RecallAtKStrategy,
    MicroAveragedPrecisionStrategy,
    MacroAveragedPrecisionStrategy,
    PrecisionAtKStrategy,
    NDCGStrategy,
    F1ScoreStrategy,
    PredictionCoverageStrategy,
    BiclusterCountStrategy,
    MeanBiclusterSizeStrategy,
    MeanBiclusterIntentStrategy,
    MeanBiclusterExtentStrategy,
    ItemCoverage,
    UserCoverage,
)

train_measures = [
    BiclusterCountStrategy(),
    MeanBiclusterSizeStrategy(),
    MeanBiclusterIntentStrategy(),
    MeanBiclusterExtentStrategy(),
    ItemCoverage(),
    UserCoverage(),
]

test_measures = [
    MAEStrategy(verbose=False),
    RMSEStrategy(verbose=False),
    MicroAveragedRecallStrategy(threshold=4.0),
    MacroAveragedRecallStrategy(threshold=4.0),
    RecallAtKStrategy(k=20, threshold=4.0),
    MicroAveragedPrecisionStrategy(threshold=4.0),
    MacroAveragedPrecisionStrategy(threshold=4.0),
    PrecisionAtKStrategy(k=20, threshold=4.0),
    NDCGStrategy(k=20, threshold=4.0),
    F1ScoreStrategy(k=20, threshold=4.0),
    PredictionCoverageStrategy(),
]

In [1]:
from pattern_mining.strategies import QUBIC2Strategy
from recommenders.BBCF import BBCF
from recommenders.common import weight_frequency
from evaluation.benchmark import GridSearch, expand_grid


qubic_parameters_grid = {
    "bicluster_number": [100000],
    "max_overlap": [0.35, 0.75, 1.0],
    "consistency": [0.75, 1.0],
    "minimum_column_width": [5, 20, 50, 100],
}

qubic_strategies = [
    QUBIC2Strategy(**named_parameters) for named_parameters in expand_grid(qubic_parameters_grid)
]

parameters_grid = {
    "mining_strategy": qubic_strategies,
    "knn_type": ["item"],
    "number_of_top_k_biclusters": [10, 50, 100, 200],
    "bicluster_similarity_strategy": [weight_frequency],
    "knn_k": [20, 50, 100, 200],
}

grid_search = GridSearch(
    BBCF,
    parameters_grid,
    test_measures,
    train_measures,
    max_workers=16,
)

from numba.core.errors import NumbaWarning
import warnings


with warnings.catch_warnings():
    warnings.simplefilter('ignore', category=NumbaWarning)
    best, ordering, raw = grid_search.fit(folds_without_index)

NameError: name 'test_measures' is not defined

In [None]:
print(raw)

In [None]:
import json
from evaluation.benchmark import FallbackEncoder

with open('IB_BBCF_VANILLA_grid_best.results', 'w') as file_object:
    json.dump(best, file_object, indent=4, cls=FallbackEncoder)

with open('IB_BBCF_VANILLA_grid_ordering.results', 'w') as file_object:
    json.dump(ordering, file_object, indent=4, cls=FallbackEncoder)

with open('IB_BBCF_VANILLA_grid_raw.results', 'w') as file_object:
    json.dump(raw, file_object, indent=4, cls=FallbackEncoder)