<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - KNN User User MAE</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, user_knn as knn
from lenskit.metrics.predict import mae
import pandas as pd
import numpy as np

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # train the algorithm with the training dataset
    # Now we run the recommender:
    preds = batch.predict(fittable, test)
    # add the algorithm name for analyzability
    return preds

In [3]:
def model_trainer(neighbours,center):
    neighbours=int(neighbours)
    all_recs = []
    test_data = []
    algo_uu = knn.UserUser(neighbours,center=center,aggregate='weighted-average') # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('UserUser', algo_uu, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)

    metric = mae(all_recs['prediction'], all_recs['rating'])
    return metric

In [4]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(params):
    neighbours = params['neighbours']
    center = params['center']
    metric = model_trainer(neighbours,center)
    print([neighbours,center,metric],',')
    return {'loss': metric, 'status': STATUS_OK }

space= {'neighbours': hp.uniform('neighbours', 1, 100),
       'center': hp.choice('center', ['True', 'False'])}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

[47.31260921987756, 'True', 0.7285144242098557]                                                                        
,                                                                                                                      
[84.1782498037066, 'True', 0.7322486672742822]                                                                         
,                                                                                                                      
[51.44370756765709, 'False', 0.7288068843750228]                                                                       
,                                                                                                                      
[75.0189248141096, 'False', 0.7277620453138514]                                                                        
,                                                                                                                      
[89.88040815692473, 'False', 0.728438087

[43.89220834981673, 'False', 0.7231038370700391]                                                                       
,                                                                                                                      
[3.3248285243324247, 'True', 0.7917581869734469]                                                                       
,                                                                                                                      
[58.89779424084626, 'False', 0.7258386123265956]                                                                       
,                                                                                                                      
[50.00346786070149, 'True', 0.7300904088749623]                                                                        
,                                                                                                                      
[24.8272191918101, 'True', 0.72659647649

[27.731952928330102, 'True', 0.7338136514696135]                                                                       
,                                                                                                                      
[34.63554768592256, 'True', 0.725303036942121]                                                                         
,                                                                                                                      
[44.430759741002646, 'True', 0.7238350008364173]                                                                       
,                                                                                                                      
[48.30781436315391, 'True', 0.7217545298497066]                                                                        
,                                                                                                                      
[49.76913136130357, 'True', 0.7223567697

In [6]:
metric = pd.DataFrame(array)
metric.columns = ['neighbours','center','MAE']
metric

Unnamed: 0,neighbours,center,MAE
0,47.312609,True,0.728514
1,84.178250,True,0.732249
2,51.443708,False,0.728807
3,75.018925,False,0.727762
4,89.880408,False,0.728438
...,...,...,...
95,32.891852,True,0.724121
96,68.631415,False,0.727458
97,30.170427,True,0.725282
98,26.858583,False,0.725106


In [7]:
metric.sort_values(by=['MAE'],ascending=True).head(10)

Unnamed: 0,neighbours,center,MAE
6,36.062464,True,0.718199
51,59.700109,False,0.719527
71,48.307814,True,0.721755
33,45.42219,True,0.721909
75,38.401567,False,0.722233
72,49.769131,True,0.722357
28,33.027666,True,0.722741
55,87.619379,False,0.722971
34,43.892208,False,0.723104
70,44.43076,True,0.723835


In [8]:
metric.to_csv('metric_MAE.csv', index=False)