<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - HPF F1 </h1> 
<h2 align="center">Alejo Paullier</h2> 

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.hpf import HPF as hpfl
from hpfrec import HPF
import pandas as pd
import numpy as np
from hyperopt import fmin, tpe, hp, STATUS_OK
np.seterr(divide='ignore', invalid='ignore')

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features):
    all_recs = []
    test_data = []
    features=int(features)
    
    hpf = hpfl(features,verbose=False) # define algorithm
     
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('HPF', hpf, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla1 = topn.RecListAnalysis()
    rla2 = topn.RecListAnalysis()
    rla1.add_metric(topn.precision)
    rla2.add_metric(topn.recall)
    prec = rla1.compute(all_recs, test_data)
    rec = rla2.compute(all_recs, test_data)
    F1 = ((prec['precision'].values*rec['recall'].values*2)/(prec['precision'].values+rec['recall'].values))
    F1 = np.nan_to_num(F1)
    return [F1.mean(),prec['precision'].mean(),rec['recall'].mean()]

In [16]:
array=[]
def objective(params):
    features = params['features']
    metric = model_trainer(features)
    array.append([features,metric[0],metric[1],metric[2]])
    return {'loss': -metric[0], 'status': STATUS_OK }

space={'features': hp.uniform('features', 1, 100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1678                                                                                                  
Latent factors to use: 23                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...  

Final RMSE: 2.4052                                                                                                     
Minutes taken (optimization part): 0.1                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1680                                                                                                  
Latent factors to use: 23                                                                                              
Initializing parameters...              

                                                                                                                       

Optimization finished
Final log-likelihood: -264660                                                                                          
Final RMSE: 2.3694                                                                                                     
Minutes taken (optimization part): 0.1                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 

Iteration 90 | train llk: -266672 | train rmse: 2.3737                                                                 
Iteration 100 | train llk: -265649 | train rmse: 2.3700                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -265649                                                                                          
Final RMSE: 2.3700                                                                                                     
Minutes taken (optimization part): 0.1                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
*****************

Iteration 70 | train llk: -253863 | train rmse: 2.3387                                                                 
Iteration 80 | train llk: -252393 | train rmse: 2.3340                                                                 
Iteration 90 | train llk: -251397 | train rmse: 2.3306                                                                 
Iteration 100 | train llk: -250518 | train rmse: 2.3272                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -250518                                                                                          
Final RMSE: 2.3272                                                                                                     
Minutes taken (optimization part): 0.2                                                                                 
*****************

Iteration 50 | train llk: -260922 | train rmse: 2.3596                                                                 
Iteration 60 | train llk: -257896 | train rmse: 2.3510                                                                 
Iteration 70 | train llk: -255610 | train rmse: 2.3447                                                                 
Iteration 80 | train llk: -253960 | train rmse: 2.3403                                                                 
Iteration 90 | train llk: -252824 | train rmse: 2.3371                                                                 
Iteration 100 | train llk: -251941 | train rmse: 2.3346                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -251941                                                                                          
Final RMSE: 2.334

Iteration 30 | train llk: -283440 | train rmse: 2.4283                                                                 
Iteration 40 | train llk: -274228 | train rmse: 2.3989                                                                 
Iteration 50 | train llk: -269491 | train rmse: 2.3836                                                                 
Iteration 60 | train llk: -266425 | train rmse: 2.3742                                                                 
Iteration 70 | train llk: -264576 | train rmse: 2.3682                                                                 
Iteration 80 | train llk: -263239 | train rmse: 2.3640                                                                 
Iteration 90 | train llk: -262191 | train rmse: 2.3610                                                                 
Iteration 100 | train llk: -261437 | train rmse: 2.3587                                                                
                                        

Iteration 10 | train llk: -338247 | train rmse: 2.5848                                                                 
Iteration 20 | train llk: -283691 | train rmse: 2.4331                                                                 
Iteration 30 | train llk: -265765 | train rmse: 2.3772                                                                 
Iteration 40 | train llk: -257561 | train rmse: 2.3497                                                                 
Iteration 50 | train llk: -252919 | train rmse: 2.3343                                                                 
Iteration 60 | train llk: -249808 | train rmse: 2.3238                                                                 
Iteration 70 | train llk: -247814 | train rmse: 2.3174                                                                 
Iteration 80 | train llk: -246359 | train rmse: 2.3129                                                                 
Iteration 90 | train llk: -245283 | trai

Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -338167 | train rmse: 2.5814                                                                 
Iteration 20 | train llk: -284237 | train rmse: 2.4376                                                                 
Iteration 30 | train llk: -266198 | train rmse: 2.3814                                                                 
Iteration 40 | train llk: -258070 | train rmse: 2.3540                                                                 
Iteration 50 | train llk: -253361 | train rmse: 2.3379                                                                 
Iteration 60 | train llk: -250276 | train rmse: 2.3278                                                                 
Iteration 70 | train llk: -248227 | trai

Latent factors to use: 13                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -407107 | train rmse: 2.7259                                                                 
Iteration 20 | train llk: -339877 | train rmse: 2.5800                                                                 
Iteration 30 | train llk: -322330 | train rmse: 2.5289                                                                 
Iteration 40 | train llk: -313898 | train rmse: 2.5025                                                                 
Iteration 50 | train llk: -309000 | trai

Number of users: 943                                                                                                   
Number of items: 1674                                                                                                  
Latent factors to use: 13                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -397481 | train rmse: 2.7096                                                                 
Iteration 20 | train llk: -332921 | train rmse: 2.5752                                                                 
Iteration 30 | train llk: -316979 | trai

Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1675                                                                                                  
Latent factors to use: 15                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -406985 | trai

Minutes taken (optimization part): 0.1                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1677                                                                                                  
Latent factors to use: 96                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                

Final log-likelihood: -235259                                                                                          
Final RMSE: 2.2804                                                                                                     
Minutes taken (optimization part): 0.3                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1675                                                                                                  
Latent factors to use: 96               

Iteration 100 | train llk: -243244 | train rmse: 2.3026                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -243244                                                                                          
Final RMSE: 2.3026                                                                                                     
Minutes taken (optimization part): 0.3                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 

Iteration 80 | train llk: -243755 | train rmse: 2.3078                                                                 
Iteration 90 | train llk: -242451 | train rmse: 2.3031                                                                 
Iteration 100 | train llk: -241389 | train rmse: 2.2996                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -241389                                                                                          
Final RMSE: 2.2996                                                                                                     
Minutes taken (optimization part): 0.3                                                                                 
**********************************                                                                                     
Hierarchical Pois

Iteration 60 | train llk: -250933 | train rmse: 2.3289                                                                 
Iteration 70 | train llk: -248775 | train rmse: 2.3216                                                                 
Iteration 80 | train llk: -247244 | train rmse: 2.3161                                                                 
Iteration 90 | train llk: -246135 | train rmse: 2.3123                                                                 
Iteration 100 | train llk: -245168 | train rmse: 2.3092                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -245168                                                                                          
Final RMSE: 2.3092                                                                                                     
Minutes taken (op

Iteration 40 | train llk: -258950 | train rmse: 2.3602                                                                 
Iteration 50 | train llk: -254477 | train rmse: 2.3453                                                                 
Iteration 60 | train llk: -251610 | train rmse: 2.3364                                                                 
Iteration 70 | train llk: -249499 | train rmse: 2.3298                                                                 
Iteration 80 | train llk: -247810 | train rmse: 2.3239                                                                 
Iteration 90 | train llk: -246495 | train rmse: 2.3194                                                                 
Iteration 100 | train llk: -245399 | train rmse: 2.3158                                                                
                                                                                                                       

Optimization finished
Final log-likelih

Iteration 20 | train llk: -294278 | train rmse: 2.4688                                                                 
Iteration 30 | train llk: -275105 | train rmse: 2.4071                                                                 
Iteration 40 | train llk: -265227 | train rmse: 2.3740                                                                 
Iteration 50 | train llk: -259497 | train rmse: 2.3554                                                                 
Iteration 60 | train llk: -256077 | train rmse: 2.3449                                                                 
Iteration 70 | train llk: -253604 | train rmse: 2.3375                                                                 
Iteration 80 | train llk: -251761 | train rmse: 2.3321                                                                 
Iteration 90 | train llk: -250428 | train rmse: 2.3287                                                                 
Iteration 100 | train llk: -249221 | tra

Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -378216 | train rmse: 2.6712                                                                 
Iteration 20 | train llk: -311106 | train rmse: 2.5078                                                                 
Iteration 30 | train llk: -291868 | train rmse: 2.4433                                                                 
Iteration 40 | train llk: -282956 | train rmse: 2.4144                                                                 
Iteration 50 | train llk: -278092 | train rmse: 2.3997                                                                 
Iteration 60 | train llk: -275185 | train rmse: 2.3908                                                                 
Iteration 70 | train llk: -273340 | train rmse: 2.3854                                                                 
 11%|█████▏                             

KeyboardInterrupt: 

In [19]:
metric = pd.DataFrame(arra)
metric.columns = ['features','F1','Precision','Recall']
metric.head(5)

Unnamed: 0,features,F1,Precision,Recall
0,23.795394,0.186553,0.123468,0.665813
1,34.664361,0.188691,0.124825,0.673078
2,57.863228,0.185726,0.123033,0.65591
3,39.97636,0.187443,0.123955,0.667214
4,73.046172,0.183328,0.121188,0.655603


In [20]:
metric.sort_values(by=['F1'],ascending=False).head(10)

Unnamed: 0,features,F1,Precision,Recall
96,19.754314,0.191152,0.126384,0.681571
50,29.104089,0.190707,0.126405,0.676537
36,19.933295,0.190302,0.126013,0.675663
86,22.512658,0.190247,0.125864,0.679971
97,23.423897,0.190082,0.125822,0.675455
99,27.441202,0.189753,0.125673,0.671325
5,13.253687,0.189599,0.125376,0.677748
61,31.81137,0.189425,0.125313,0.677147
21,29.553828,0.189418,0.125355,0.676238
12,20.004908,0.189364,0.125323,0.677102


In [21]:
metric.shape

(100, 4)

In [22]:
metric.to_csv('metric_F1.csv', index=False)