In [1]:
# Setup notebook
import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

In [2]:
from dataset.common import resolve_folds
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds()
folds = resolve_folds(data, k_fold)

folds_without_index = [fold[1] for fold in folds]

_, (trainset, testset) = folds[0]

Already downloaded!. Nothing to do.


In [3]:
from evaluation.strategies import (
    MAEStrategy,
    RMSEStrategy,
    MicroAveragedRecallStrategy,
    MacroAveragedRecallStrategy,
    RecallAtKStrategy,
    MicroAveragedPrecisionStrategy,
    MacroAveragedPrecisionStrategy,
    PrecisionAtKStrategy,
    NDCGStrategy,
    F1ScoreStrategy,
    PredictionCoverageStrategy,
    BiclusterCountStrategy,
    MeanBiclusterSizeStrategy,
    MeanBiclusterIntentStrategy,
    MeanBiclusterExtentStrategy,
    ItemCoverage,
    UserCoverage,
)

train_measures = [
    BiclusterCountStrategy(),
    MeanBiclusterSizeStrategy(),
    MeanBiclusterIntentStrategy(),
    MeanBiclusterExtentStrategy(),
    ItemCoverage(),
    UserCoverage(),
]

test_measures = [
    MAEStrategy(verbose=False),
    RMSEStrategy(verbose=False),
    MicroAveragedRecallStrategy(threshold=4.0),
    MacroAveragedRecallStrategy(threshold=4.0),
    RecallAtKStrategy(k=20, threshold=4.0),
    MicroAveragedPrecisionStrategy(threshold=4.0),
    MacroAveragedPrecisionStrategy(threshold=4.0),
    PrecisionAtKStrategy(k=20, threshold=4.0),
    NDCGStrategy(k=20, threshold=4.0),
    F1ScoreStrategy(k=20, threshold=4.0),
    PredictionCoverageStrategy(),
]

In [4]:
from evaluation.benchmark import fit_and_score

from pattern_mining.strategies import GreConDStrategy, QUBIC2Strategy

from recommenders.BBCF import BBCF
from recommenders.PAkNN import PAkNN
from recommenders.common import weight_frequency, double_weight_frequency, user_pattern_similarity

from pattern_mining.strategies import BinaPsStrategy
from recommenders.BBCF import BBCF
from recommenders.common import weight_frequency
from evaluation.benchmark import GridSearch, generate_parameter_combinations

# recommender = BBCF(
#     mining_strategy=BinaPsStrategy(
#         dataset_binarization_threshold=4.0,
#         hidden_dimension_neurons_number=None,
#         epochs=10000,
#         weights_binarization_threshold=0.1,
#     ),
#     knn_type="item",
#     number_of_top_k_biclusters=100,
#     bicluster_similarity_strategy=weight_frequency,
#     knn_k=50,
# )

recommender = BBCF(
    mining_strategy=QUBIC2Strategy(bicluster_number=1000, minimum_column_width=30),
    knn_type="item",
    number_of_top_k_biclusters=200,
    bicluster_similarity_strategy=weight_frequency,
    knn_k=100,
)


results = [
    fit_and_score(
        recommender_system=recommender,
        trainset=trainset,
        testset=testset,
        test_measures=test_measures,
        train_measures=train_measures,
    )
]


Cannot cache compiled function "get_top_k_biclusters_for_user" as it uses dynamic globals (such as ctypes pointers and large global arrays)



Completed 20000/20000 | Avg. time/task: 0m 0.1s | Time left: 0m 0.0sss
All tasks completed.
Total time: 0h 36m 5.4s


In [6]:
for key, value in results[0][0].items():
    try:
        print(f"{key:30} {value:.2f}")
    except TypeError:
        print(f"{key:30} {value}")

for key, value in results[0][1].items():
    try:
        print(f"{key:30} {value:.2f}")
    except TypeError:
        print(f"{key:30} {value}")

mae                            0.75
rmse                           0.98
micro_averaged_recall          0.41
macro_averaged_recall          0.36
recall_at_k                    0.27
micro_averaged_precision       0.81
macro_averaged_precision       0.75
precision_at_k                 0.75
nDCG_at_k                      0.40
f1_score                       0.40
prediction_coverage            0.44
bicluster_count                260.00
mean_bicluster_size            92.04
mean_bicluster_intent          45.34
mean_bicluster_extent          2.04
item_coverage                  0.52
user_coverage                  0.29
