<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - Bias RMSE</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.basic import Bias
from lenskit.metrics.predict import rmse
import pandas as pd
import numpy as np

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # train the algorithm with the training dataset
    # Now we run the recommender:
    preds = batch.predict(fittable, test)
    # add the algorithm name for analyzability
    return preds

In [5]:
def model_trainer(item,user,damping):
    all_recs = []
    test_data = []
    bias = Bias(items=item, users=user, damping=damping) # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('Bias', bias, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)

    metric = rmse(all_recs['prediction'], all_recs['rating'])
    
    return metric

In [6]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(params):
    user = params['user']
    item = params['item']
    damping = params['damping']
    metric = model_trainer(item,user,damping)
    print([item,user,damping,metric],',')
    return {'loss': metric, 'status': STATUS_OK }

space={'item': hp.choice('item', ['True', 'False']),
       'user': hp.choice('user', ['True', 'False']),
       'damping': hp.uniform('damping', 0, 1)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

['True', 'True', 0.2969171230936699, 0.9607172741264611]                                                               
,                                                                                                                      
['False', 'False', 0.8474591347169902, 0.9165906323676718]                                                             
,                                                                                                                      
['True', 'False', 0.9650826517341052, 0.9569927610721357]                                                              
,                                                                                                                      
['True', 'False', 0.14268535380633807, 0.959216767445689]                                                              
,                                                                                                                      
['False', 'True', 0.215195782036797, 0.9

['False', 'False', 0.2736585820955255, 0.9322075351292441]                                                             
,                                                                                                                      
['False', 'False', 0.33087397885511205, 0.9573524210173106]                                                            
,                                                                                                                      
['False', 'False', 0.054669229586821466, 0.9575936020037344]                                                           
,                                                                                                                      
['False', 'False', 0.4288234724728911, 0.9430696484627581]                                                             
,                                                                                                                      
['False', 'False', 0.18824691504184826, 

['True', 'True', 0.9359690473345821, 0.9600351665293769]                                                               
,                                                                                                                      
['True', 'True', 0.8365213324619818, 0.949244349328446]                                                                
,                                                                                                                      
['True', 'True', 0.8959603785488413, 0.9520853115660245]                                                               
,                                                                                                                      
['True', 'True', 0.3527017194447868, 0.961624390917959]                                                                
,                                                                                                                      
['True', 'True', 0.5399787395831718, 0.9

In [8]:
metric = pd.DataFrame(array)
metric.columns = ['items','users','damping','RMSE']
metric

Unnamed: 0,items,users,damping,RMSE
0,True,True,0.296917,0.960717
1,False,False,0.847459,0.916591
2,True,False,0.965083,0.956993
3,True,False,0.142685,0.959217
4,False,True,0.215196,0.954337
5,True,True,0.083250,0.938482
6,True,True,0.917356,0.930899
7,False,False,0.148077,0.954934
8,False,True,0.490725,0.974188
9,False,False,0.105512,0.931604


In [9]:
metric.sort_values(by=['RMSE'],ascending=True).head(10)

Unnamed: 0,items,users,damping,RMSE
55,True,False,0.998519,0.864112
27,False,False,0.294508,0.883646
64,True,True,0.607583,0.885283
82,True,False,0.321323,0.8896
47,False,False,0.106046,0.893487
93,True,False,0.871882,0.895405
81,True,False,0.223261,0.896817
32,False,False,0.243877,0.897409
60,True,False,0.637484,0.898031
65,True,True,0.665613,0.900994
