In [1]:
# Setup notebook
import random
import numpy as np
import evaluation.plot as plot

# Load the autoreload extension
%load_ext autoreload

# Set autoreload to reload all modules every time a cell is executed
%autoreload 2

# Call the function to customize the default template
plot.customize_default_template()

seed = 0
random.seed(seed)
np.random.seed(seed)

In [2]:
from dataset.common import resolve_folds
from dataset.movie_lens import load_ml_100k_folds

data, k_fold = load_ml_100k_folds()
folds = resolve_folds(data, k_fold)

folds_without_index = [fold[1] for fold in folds]

_, (trainset, testset) = folds[0]

Already downloaded!. Nothing to do.


In [3]:
from evaluation.strategies import (
    MAEStrategy,
    RMSEStrategy,
    MicroAveragedRecallStrategy,
    MacroAveragedRecallStrategy,
    RecallAtKStrategy,
    MicroAveragedPrecisionStrategy,
    MacroAveragedPrecisionStrategy,
    PrecisionAtKStrategy,
    NDCGStrategy,
    F1ScoreStrategy,
    PredictionCoverageStrategy,
)

train_measures = [ ]

test_measures = [
    MAEStrategy(verbose=False),
    RMSEStrategy(verbose=False),
    MicroAveragedRecallStrategy(threshold=4.0),
    MacroAveragedRecallStrategy(threshold=4.0),
    RecallAtKStrategy(k=20, threshold=4.0),
    MicroAveragedPrecisionStrategy(threshold=4.0),
    MacroAveragedPrecisionStrategy(threshold=4.0),
    PrecisionAtKStrategy(k=20, threshold=4.0),
    NDCGStrategy(k=20, threshold=4.0),
    F1ScoreStrategy(k=20, threshold=4.0),
    PredictionCoverageStrategy(),
]


In [5]:
from surprise.prediction_algorithms import SVDpp
from evaluation.benchmark import GridSearch

parameters_grid = {
    "n_factors": [20, 50, 100, 200],
    "n_epochs": [10, 20, 40],
    "lr_all": [0.002, 0.005, 0.01],
    "reg_all": [0.01, 0.02, 0.3],
}

grid_search = GridSearch(
    SVDpp,
    parameters_grid,
    test_measures,
    train_measures,
    max_workers=16,
)

best, ordering, raw = grid_search.fit(folds_without_index)

Completed 108/108 | Avg. time/task: 0m 35.8s | Time left: 0m 0.0s | Estimated completion time: 14:28:528
All tasks completed.
Total time: 1h 4m 30.6s

In [6]:
import json

with open('SVD++_grid_best.results', 'w') as file_object:
    json.dump(best, file_object, indent=4)

with open('SVD++_grid_ordering.results', 'w') as file_object:
    json.dump(ordering, file_object, indent=4)

with open('SVD++_grid_raw.results', 'w') as file_object:
    json.dump(raw, file_object, indent=4)


: 