<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - KNN Item Item F1 </h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, item_knn as knn
from lenskit import topn
import pandas as pd
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(neighbours,center):
    neighbours=int(neighbours)
    all_recs = []
    test_data = []
    algo_ii = knn.ItemItem(neighbours,center=center,aggregate='weighted-average')
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test)
        all_recs.append(eval('ItemItem', algo_ii, train, test))
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla1 = topn.RecListAnalysis()
    rla2 = topn.RecListAnalysis()
    rla1.add_metric(topn.precision)
    rla2.add_metric(topn.recall)
    prec = rla1.compute(all_recs, test_data)
    rec = rla2.compute(all_recs, test_data)
    F1 = ((prec['precision'].values*rec['recall'].values*2)/(prec['precision'].values+rec['recall'].values))
    F1 = np.nan_to_num(F1)
    return [F1.mean(),prec['precision'].mean(),rec['recall'].mean()]

In [3]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(params):
    neighbours = params['neighbours']
    center = params['center']
    metric = model_trainer(neighbours,center)
    print([neighbours,center,metric[0],metric[1],metric[2]],',')
    return {'loss': -metric[0], 'status': STATUS_OK }

space= {'neighbours': hp.uniform('neighbours', 1, 100),
       'center': hp.choice('center', ['True', 'False'])}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

  0%|                                                                            | 0/100 [00:00<?, ?it/s, best loss: ?]




[4.73073998265985, 'False', 0.05470324527213706, 0.03732767762460245, 0.16421641057829553]                             
,                                                                                                                      
[12.259833044937453, 'True', 0.0590401500219406, 0.04032873806998943, 0.17351977323861756]                             
,                                                                                                                      
[12.568000116848962, 'True', 0.05898531849340851, 0.040360551431601284, 0.1719618920584041]                            
,                                                                                                                      
[52.15190936745623, 'True', 0.057511995331144976, 0.039374337221633186, 0.1664491332026238]                            
,                                                                                                                      
[22.475242048357448, 'False', 0.05935510

[8.173221418623681, 'True', 0.0584253277668968, 0.03987274655355263, 0.17172551798283084]                              
,                                                                                                                      
[32.76255026065006, 'True', 0.0604009442791481, 0.0411983032873807, 0.17594406083075556]                               
,                                                                                                                      
[17.261363732015084, 'True', 0.05978473784174907, 0.04088016967126198, 0.1730325310960779]                             
,                                                                                                                      
[32.328505658122175, 'True', 0.05940026598258393, 0.04074231177094389, 0.17092884814525144]                            
,                                                                                                                      
[16.20203133249948, 'True', 0.0589136450

[27.692540929011653, 'True', 0.05944236942780685, 0.040678685047720035, 0.17082756116122697]                           
,                                                                                                                      
[23.08878718258508, 'True', 0.05840139680791569, 0.039862142099681946, 0.17115529111529493]                            
,                                                                                                                      
[32.52118112996513, 'True', 0.05850889629610157, 0.04007423117709445, 0.1677415999078562]                              
,                                                                                                                      
[14.156173296210508, 'True', 0.060565534131963195, 0.04123011664899268, 0.18145359286742258]                           
,                                                                                                                      
[1.7326978790677856, 'True', 0.046273129

In [6]:
metric = pd.DataFrame(array)
metric.columns = ['neighbours','center','F1','Precision','Recall']
metric.head(5)

Unnamed: 0,neighbours,center,F1,Precision,Recall
0,4.73074,False,0.054703,0.037328,0.164216
1,12.259833,True,0.05904,0.040329,0.17352
2,12.568,True,0.058985,0.040361,0.171962
3,52.151909,True,0.057512,0.039374,0.166449
4,22.475242,False,0.059355,0.040562,0.173055


In [7]:
metric.sort_values(by=['F1'],ascending=False).head(10)

Unnamed: 0,neighbours,center,F1,Precision,Recall
43,39.095613,True,0.061291,0.041824,0.179067
46,23.811187,True,0.0607,0.041421,0.17644
81,19.502528,True,0.060633,0.041357,0.179098
71,14.156173,True,0.060566,0.04123,0.181454
79,29.058404,True,0.060473,0.041474,0.170902
31,21.718996,True,0.060438,0.041379,0.173564
82,18.924115,True,0.060408,0.041273,0.176777
35,32.76255,True,0.060401,0.041198,0.175944
6,44.931244,True,0.060392,0.041326,0.175787
64,48.974381,False,0.060311,0.041347,0.170991


In [8]:
metric.to_csv('metric_F1.csv', index=False)