<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - KNN Item Item hyperparameter optimization</h1> 
<h2 align="center">Alejo Paullier</h2> 

* https://lkpy.lenskit.org/en/stable/knn.html
* https://parameter-sherpa.readthedocs.io/en/latest/algorithms/algorithms.html

In [2]:
# lOAD LIBRARIES

from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, als, item_knn as knn
from lenskit import topn
import pandas as pd
import numpy as np
import sherpa

# LOAD DATA 

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])

In [16]:
def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [17]:
def model_trainer(neighbours):
    all_recs = []
    test_data = []
    algo_ii = knn.ItemItem(neighbours,aggregate='weighted-average')
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 1, xf.SampleFrac(0.2)):
        test_data.append(test)
        all_recs.append(eval('ItemItem', algo_ii, train, test))
    all_recs = pd.concat(all_recs, ignore_index=True)
    all_recs.head()
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(all_recs, test_data)
    results.head()
    return -results.groupby('Algorithm').ndcg.mean()


# Parameters

In [3]:
parameters = [sherpa.Discrete(name='neighbours', range=[1,10], scale='linear')]

# Algorithms

In [4]:
algorithm = sherpa.algorithms.RandomSearch(max_num_trials=8)

# Study

In [5]:
study = sherpa.Study(parameters=parameters,
                 algorithm=algorithm,
                 lower_is_better=False)

OSError: Dashboard not supported on Windows. Disable the dashboard and save the finalized study instead.

# Trial

In [None]:
trial = study.get_suggestion()

In [None]:
num_iterations = 8
for trial in study:
    model = model_trainer(train.parameters)
    for iteration in range(num_iterations):
        training_error = model.fit(epochs=1)
        validation_error = model.evaluate()
        study.add_observation(trial=trial,
                              iteration=iteration,
                              objective=validation_error,
                              context={'training_error': training_error})
    study.finalize(trial)

In [None]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(x):
    return {'loss': model_trainer(x), 'status': STATUS_OK }
average=[]
for y in range(1,6):
    best = fmin(objective,
        space=hp.quniform('x', 1, 30),
        algo=tpe.suggest,
        max_evals=20
               )
    average.append(best)
return average

# Output

In [None]:
[{'x': 24.662888246908466},
 {'x': 16.765983379302043},
 {'x': 18.317671770214982},
 {'x': 24.685607251830028},
 {'x': 19.806044815779607},
 {'x': 29.48438448238169},
 {'x': 8.0991877556541},
 {'x': 11.199447121503693},
 {'x': 24.582880111854013},
 {'x': 8.399833875607502}]

100%|██████████████████████████████████████████████████| 20/20 [03:55<00:00, 10.93s/it, best loss: -0.0976920342362734]
100%|█████████████████████████████████████████████████| 20/20 [03:42<00:00, 11.11s/it, best loss: -0.10159618987493899]
100%|█████████████████████████████████████████████████| 20/20 [03:37<00:00, 11.05s/it, best loss: -0.09763179535129299]
100%|█████████████████████████████████████████████████| 20/20 [03:45<00:00, 11.16s/it, best loss: -0.10022280319405072]
100%|█████████████████████████████████████████████████| 20/20 [03:40<00:00, 11.07s/it, best loss: -0.10125246825974063]
100%|█████████████████████████████████████████████████| 20/20 [07:19<00:00, 11.87s/it, best loss: -0.09793780178603041]
100%|█████████████████████████████████████████████████| 20/20 [03:43<00:00, 11.13s/it, best loss: -0.10112992547143698]
100%|█████████████████████████████████████████████████| 20/20 [03:36<00:00, 10.98s/it, best loss: -0.09963492665250731]
100%|█████████████████████████████████████████████████| 20/20 [03:41<00:00, 10.88s/it, best loss: -0.09796062967655288]
100%|█████████████████████████████████████████████████| 20/20 [03:46<00:00, 10.63s/it, best loss: -0.10072238410400004]