<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
    <h1 align="center">Thesis - Alternating Least Squares hyperparameter optimization</h1> 
<h2 align="center">Alejo Paullier</h2> 

# Import libraries

In [20]:
import pandas as pd
import numpy as np
import scipy
import sklearn
import flask
import GPyOpt
import sherpa
import pylab as pl
import time

import sherpa.algorithms.bayesian_optimization as bayesian_optimization
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, item_knn as knn
from lenskit import topn

# Import data & define evaluation function

In [21]:
ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

# Define model

In [27]:
def model_trainer(neighbours):
    all_recs = []
    test_data = []
    algo_ii = knn.ItemItem(neighbours,aggregate='weighted-average')
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 1, xf.SampleFrac(0.2)):
        test_data.append(test)
        all_recs.append(eval('ItemItem', algo_ii, train, test))
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(all_recs, test_data)
    ndcg = float(results.groupby('Algorithm').ndcg.mean())
    return ndcg

In [28]:
model_trainer(5)

INFO:lenskit.crossfold:partitioning 100000 rows for 943 users into 1 partitions
INFO:lenskit.algorithms.item_knn:[ 14ms] made sparse matrix for 1659 items (80000 ratings from 943 users)
INFO:lenskit.algorithms.item_knn:[ 25ms] computed means for 1659 items
INFO:lenskit.algorithms.item_knn:[ 40ms] normalized rating matrix columns
INFO:lenskit.algorithms.item_knn:[ 42ms] computing similarity matrix
INFO:lenskit.algorithms.item_knn:[ 45ms] multiplying matrix with MKL
INFO:lenskit.algorithms.item_knn:[ 99ms] truncating 779903 neighbors to 1016524 (of 1016524 possible)
INFO:lenskit.algorithms.item_knn:[ 297ms] sorting neighborhoods
INFO:lenskit.algorithms.item_knn:[ 370ms] got neighborhoods for 1477 of 1659 items
INFO:lenskit.algorithms.item_knn:[ 373ms] computed 1016524 neighbor pairs
INFO:lenskit.batch._recommend:parallel backend SequentialBackend, effective njobs 1
INFO:lenskit.batch._recommend:recommending for 943 users (n_jobs=None)
INFO:lenskit.batch._recommend:recommended for 943 use

0.08999937258936189

# Define parameters

In [23]:
parameters = [sherpa.Discrete('neighbours', [1,30])]
alg = bayesian_optimization.GPyOpt(model_type='GP', num_initial_data_points='infer',max_num_trials=5)

In [24]:
study = sherpa.Study(parameters=parameters,
                     algorithm=alg,
                     lower_is_better=False,
                     disable_dashboard=True)

In [37]:
for trial in study:
    print("Trial {}:\t{}".format(trial.id, trial.parameters))
    model = model_trainer(trial.parameters['neighbours'])
    num_iterations = 5
    
    for i in range(num_iterations):
        study.add_observation(trial=trial,
                              iteration=i+1,
                              objective=model)
        time.sleep(0.1)
    #study.finalize(trial=trial)

Trial 3:	{'neighbours': 16}


INFO:lenskit.crossfold:partitioning 100000 rows for 943 users into 1 partitions
INFO:lenskit.algorithms.item_knn:[ 8ms] made sparse matrix for 1643 items (80000 ratings from 943 users)
INFO:lenskit.algorithms.item_knn:[ 14ms] computed means for 1643 items
INFO:lenskit.algorithms.item_knn:[ 94ms] normalized rating matrix columns
INFO:lenskit.algorithms.item_knn:[ 97ms] computing similarity matrix
INFO:lenskit.algorithms.item_knn:[ 98ms] multiplying matrix with MKL
INFO:lenskit.algorithms.item_knn:[ 155ms] truncating 780501 neighbors to 1016896 (of 1016896 possible)
INFO:lenskit.algorithms.item_knn:[ 262ms] sorting neighborhoods
INFO:lenskit.algorithms.item_knn:[ 374ms] got neighborhoods for 1475 of 1643 items
INFO:lenskit.algorithms.item_knn:[ 376ms] computed 1016896 neighbor pairs
INFO:lenskit.batch._recommend:parallel backend SequentialBackend, effective njobs 1
INFO:lenskit.batch._recommend:recommending for 943 users (n_jobs=None)
INFO:lenskit.batch._recommend:recommended for 943 use

Trial 4:	{'neighbours': 16}


INFO:lenskit.crossfold:partitioning 100000 rows for 943 users into 1 partitions
INFO:lenskit.algorithms.item_knn:[ 5ms] made sparse matrix for 1646 items (80000 ratings from 943 users)
INFO:lenskit.algorithms.item_knn:[ 11ms] computed means for 1646 items
INFO:lenskit.algorithms.item_knn:[ 17ms] normalized rating matrix columns
INFO:lenskit.algorithms.item_knn:[ 18ms] computing similarity matrix
INFO:lenskit.algorithms.item_knn:[ 18ms] multiplying matrix with MKL
INFO:lenskit.algorithms.item_knn:[ 36ms] truncating 776733 neighbors to 1010482 (of 1010482 possible)
INFO:lenskit.algorithms.item_knn:[ 117ms] sorting neighborhoods
INFO:lenskit.algorithms.item_knn:[ 147ms] got neighborhoods for 1470 of 1646 items
INFO:lenskit.algorithms.item_knn:[ 148ms] computed 1010482 neighbor pairs
INFO:lenskit.batch._recommend:parallel backend SequentialBackend, effective njobs 1
INFO:lenskit.batch._recommend:recommending for 943 users (n_jobs=None)
INFO:lenskit.batch._recommend:recommended for 943 user

Trial 5:	{'neighbours': 20}


INFO:lenskit.crossfold:partitioning 100000 rows for 943 users into 1 partitions
INFO:lenskit.algorithms.item_knn:[ 6ms] made sparse matrix for 1651 items (80000 ratings from 943 users)
INFO:lenskit.algorithms.item_knn:[ 11ms] computed means for 1651 items
INFO:lenskit.algorithms.item_knn:[ 17ms] normalized rating matrix columns
INFO:lenskit.algorithms.item_knn:[ 18ms] computing similarity matrix
INFO:lenskit.algorithms.item_knn:[ 19ms] multiplying matrix with MKL
INFO:lenskit.algorithms.item_knn:[ 36ms] truncating 778826 neighbors to 1015032 (of 1015032 possible)
INFO:lenskit.algorithms.item_knn:[ 123ms] sorting neighborhoods
INFO:lenskit.algorithms.item_knn:[ 150ms] got neighborhoods for 1474 of 1651 items
INFO:lenskit.algorithms.item_knn:[ 151ms] computed 1015032 neighbor pairs
INFO:lenskit.batch._recommend:parallel backend SequentialBackend, effective njobs 1
INFO:lenskit.batch._recommend:recommending for 943 users (n_jobs=None)
INFO:lenskit.batch._recommend:recommended for 943 user

In [None]:
print(study.get_best_result())

In [None]:
for trial in study:
    print("Trial {}:\t{}".format(trial.id, trial.parameters))
    model = model_trainer(trial.parameters['neighbours'])
    study.add_observation(trial=trial,
                          iteration=i+1,
                          objective=model)
    time.sleep(0.1)
    study.finalize(trial=trial)

In [None]:
trial = study.get_suggestion()

In [None]:
study.get_best_result()