<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - Bias MRR</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [5]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.basic import Bias
from lenskit import topn
import pandas as pd
import numpy as np

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [3]:
def model_trainer(item,user,damping):
    all_recs = []
    test_data = []
    
    bias = Bias(items=item, users=user, damping=damping) # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('Bias', bias, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.recip_rank)
    results = rla.compute(all_recs, test_data)
                          
    return results["recip_rank"].mean()

In [7]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(params):
    user = params['user']
    item = params['item']
    damping = params['damping']
    metric = model_trainer(item,user,damping)
    print([item,user,damping,metric],',')
    return {'loss': -metric, 'status': STATUS_OK }

space={'item': hp.choice('item', ['True', 'False']),
       'user': hp.choice('user', ['True', 'False']),
       'damping': hp.uniform('damping', 0, 1)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

['True', 'False', 0.48753398689970673, 0.03561845989402783]                                                            
,                                                                                                                      
['True', 'True', 0.8532966039310298, 0.05533702753937216]                                                              
,                                                                                                                      
['False', 'True', 0.727875823155986, 0.048201706915034184]                                                             
,                                                                                                                      
['False', 'True', 0.26479131555589663, 0.03288442994064367]                                                            
,                                                                                                                      
['True', 'True', 0.4870269102400625, 0.0

['True', 'False', 0.40296950461094616, 0.034982564621170255]                                                           
,                                                                                                                      
['True', 'False', 0.7342194118787742, 0.05311479984770032]                                                             
,                                                                                                                      
['True', 'False', 0.8503965518123551, 0.05473551617575043]                                                             
,                                                                                                                      
['True', 'False', 0.16692279987205072, 0.03257460141490621]                                                            
,                                                                                                                      
['True', 'False', 0.6383085634280671, 0.

['False', 'True', 0.9096079838856113, 0.05564370392796105]                                                             
,                                                                                                                      
['False', 'True', 0.9318524147865024, 0.05579321548711162]                                                             
,                                                                                                                      
['False', 'True', 0.7668458461599196, 0.05152944022984899]                                                             
,                                                                                                                      
['False', 'True', 0.6875969306726997, 0.05142194526879818]                                                             
,                                                                                                                      
['False', 'True', 0.881630671100445, 0.0

In [14]:
metric = pd.DataFrame(array)
metric.columns = ['items','users','damping','MRR']
metric.head(5)

Unnamed: 0,items,users,damping,MRR
0,True,False,0.487534,0.035618
1,True,True,0.853297,0.055337
2,False,True,0.727876,0.048202
3,False,True,0.264791,0.032884
4,True,True,0.487027,0.036355


In [15]:
metric.sort_values(by=['MRR'],ascending=False).head(10)

Unnamed: 0,items,users,damping,MRR
96,True,True,0.941793,0.060358
30,True,False,0.998435,0.059589
67,False,True,0.908648,0.058621
23,True,False,0.987319,0.05854
51,False,True,0.951587,0.05849
74,False,True,0.815099,0.057624
5,True,True,0.887894,0.057265
46,False,False,0.992128,0.057133
29,True,True,0.90454,0.056816
65,False,True,0.946512,0.056694


In [16]:
metric.to_csv('metric_mrr.csv', index=False)