<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - KNN Item Item RMSE</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, item_knn as knn
from lenskit.metrics.predict import rmse
import pandas as pd
import numpy as np

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # train the algorithm with the training dataset
    # Now we run the recommender:
    preds = batch.predict(fittable, test)
    # add the algorithm name for analyzability
    return preds

In [9]:
def model_trainer(neighbours,center):
    neighbours=int(neighbours)
    all_recs = []
    test_data = []
    algo_ii = knn.ItemItem(neighbours,center=center,aggregate='weighted-average') # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('ItemItem', algo_ii, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)

    metric = rmse(all_recs['prediction'], all_recs['rating'])
    return metric

In [10]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(params):
    neighbours = params['neighbours']
    center = params['center']
    metric = model_trainer(neighbours,center)
    print([neighbours,center,metric],',')
    return {'loss': metric, 'status': STATUS_OK }

space= {'neighbours': hp.uniform('neighbours', 1, 100),
       'center': hp.choice('center', ['True', 'False'])}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

[9.096400981943427, 'False', 0.9098975293917538]                                                                       
,                                                                                                                      
[29.29633808901985, 'True', 0.9092006436150536]                                                                        
,                                                                                                                      
[4.96396577059455, 'True', 0.9430802498271568]                                                                         
,                                                                                                                      
[30.857997324885293, 'False', 0.8952065930660951]                                                                      
,                                                                                                                      
[82.81860826721089, 'False', 0.916017567

[18.932548194331485, 'False', 0.9088911342938579]                                                                      
,                                                                                                                      
[47.772180195853515, 'False', 0.9058846011700588]                                                                      
,                                                                                                                      
[11.381944779867744, 'False', 0.9150762085240562]                                                                      
,                                                                                                                      
[36.4162934903565, 'False', 0.8994972047237615]                                                                        
,                                                                                                                      
[83.270305822881, 'False', 0.90981860459

In [12]:
metric = pd.DataFrame(array)
metric.columns = ['neighbours','center','RMSE']
metric

Unnamed: 0,neighbours,center,RMSE
0,9.096401,False,0.909898
1,29.296338,True,0.909201
2,4.963966,True,0.943080
3,30.857997,False,0.895207
4,82.818608,False,0.916018
...,...,...,...
95,29.671835,True,0.900278
96,78.155930,True,0.912938
97,91.176195,False,0.907714
98,33.298479,True,0.899251


In [13]:
metric.sort_values(by=['RMSE'],ascending=True).head(10)

Unnamed: 0,neighbours,center,RMSE
19,37.874783,True,0.893107
84,20.497183,False,0.893773
10,72.505549,True,0.895184
3,30.857997,False,0.895207
57,16.796112,True,0.895436
29,42.312312,True,0.897178
74,27.458344,False,0.897594
66,23.756736,False,0.89796
98,33.298479,True,0.899251
75,33.967941,True,0.899307


In [14]:
metric.to_csv('metric_RMSE.csv', index=False)