<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - HPF MRR</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.hpf import HPF as hpfl
from hpfrec import HPF
import pandas as pd
import numpy as np
from hyperopt import fmin, tpe, hp, STATUS_OK

ratings = pd.read_csv('D:\\Escritorio\\UM\\Tesis\\ML 1M\\ratings.dat', sep='::',engine='python',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features):
    all_recs = []
    test_data = []
    features=int(features)
    
    hpf = hpfl(features,verbose=False) # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('HPF', hpf, train, test))
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.recip_rank)
    results = rla.compute(all_recs, test_data)
                          
    return results["recip_rank"].mean()

In [5]:
array = []
def objective(params):
    features = params['features']
    metric = model_trainer(features)
    array.append([features,metric])
    return {'loss': -metric, 'status': STATUS_OK}

space={'features': hp.uniform('features', 1, 100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 6040                                                                                                  
Number of items: 3706                                                                                                  
Latent factors to use: 65                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...  

Final RMSE: 2.5180                                                                                                     
Minutes taken (optimization part): 2.0                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 6040                                                                                                  
Number of items: 3703                                                                                                  
Latent factors to use: 65                                                                                              
Initializing parameters...              

                                                                                                                       

Optimization finished
Final log-likelihood: -3447563                                                                                         
Final RMSE: 2.5788                                                                                                     
Minutes taken (optimization part): 1.1                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 6040                                                                                                  
Number of items: 

Iteration 90 | train llk: -3475418 | train rmse: 2.5880                                                                
Iteration 100 | train llk: -3461281 | train rmse: 2.5847                                                               
                                                                                                                       

Optimization finished
Final log-likelihood: -3461281                                                                                         
Final RMSE: 2.5847                                                                                                     
Minutes taken (optimization part): 1.1                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
*****************

Iteration 70 | train llk: -3291626 | train rmse: 2.5413                                                                
Iteration 80 | train llk: -3258887 | train rmse: 2.5329                                                                
Iteration 90 | train llk: -3235154 | train rmse: 2.5269                                                                
Iteration 100 | train llk: -3217480 | train rmse: 2.5225                                                               
                                                                                                                       

Optimization finished
Final log-likelihood: -3217480                                                                                         
Final RMSE: 2.5225                                                                                                     
Minutes taken (optimization part): 1.9                                                                                 
*****************

Iteration 50 | train llk: -3377839 | train rmse: 2.5616                                                                
Iteration 60 | train llk: -3320616 | train rmse: 2.5465                                                                
Iteration 70 | train llk: -3283759 | train rmse: 2.5368                                                                
Iteration 80 | train llk: -3257351 | train rmse: 2.5296                                                                
Iteration 90 | train llk: -3238289 | train rmse: 2.5245                                                                
Iteration 100 | train llk: -3223831 | train rmse: 2.5208                                                               
                                                                                                                       

Optimization finished
Final log-likelihood: -3223831                                                                                         
Final RMSE: 2.520

Iteration 30 | train llk: -3456110 | train rmse: 2.5860                                                                
Iteration 40 | train llk: -3301899 | train rmse: 2.5441                                                                
Iteration 50 | train llk: -3214496 | train rmse: 2.5210                                                                
Iteration 60 | train llk: -3156483 | train rmse: 2.5060                                                                
Iteration 70 | train llk: -3116466 | train rmse: 2.4954                                                                
Iteration 80 | train llk: -3087304 | train rmse: 2.4880                                                                
Iteration 90 | train llk: -3064897 | train rmse: 2.4821                                                                
Iteration 100 | train llk: -3046350 | train rmse: 2.4774                                                               
                                        

Iteration 10 | train llk: -5084383 | train rmse: 2.9542                                                                
Iteration 20 | train llk: -4304471 | train rmse: 2.7930                                                                
Iteration 30 | train llk: -3964596 | train rmse: 2.7164                                                                
Iteration 40 | train llk: -3787247 | train rmse: 2.6728                                                                
Iteration 50 | train llk: -3696453 | train rmse: 2.6491                                                                
Iteration 60 | train llk: -3646924 | train rmse: 2.6350                                                                
Iteration 70 | train llk: -3616032 | train rmse: 2.6260                                                                
Iteration 80 | train llk: -3596573 | train rmse: 2.6204                                                                
Iteration 90 | train llk: -3582010 | tra

Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -5083181 | train rmse: 2.9507                                                                
Iteration 20 | train llk: -4287302 | train rmse: 2.7782                                                                
Iteration 30 | train llk: -3931137 | train rmse: 2.6954                                                                
Iteration 40 | train llk: -3779188 | train rmse: 2.6598                                                                
Iteration 50 | train llk: -3703583 | train rmse: 2.6413                                                                
Iteration 60 | train llk: -3660302 | train rmse: 2.6302                                                                
Iteration 70 | train llk: -3631844 | tra

Latent factors to use: 70                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -4752343 | train rmse: 2.8940                                                                
Iteration 20 | train llk: -3892430 | train rmse: 2.6946                                                                
Iteration 30 | train llk: -3555066 | train rmse: 2.6095                                                                
Iteration 40 | train llk: -3386259 | train rmse: 2.5652                                                                
Iteration 50 | train llk: -3298969 | tra

Number of users: 6040                                                                                                  
Number of items: 3701                                                                                                  
Latent factors to use: 70                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -4786811 | train rmse: 2.8947                                                                
Iteration 20 | train llk: -3894174 | train rmse: 2.6889                                                                
Iteration 30 | train llk: -3559414 | tra

Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 6040                                                                                                  
Number of items: 3703                                                                                                  
Latent factors to use: 34                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -5030310 | tra

Minutes taken (optimization part): 1.1                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 6040                                                                                                  
Number of items: 3705                                                                                                  
Latent factors to use: 87                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                

KeyboardInterrupt: 

In [6]:
array 

[[65.59430402002553, 0.6141355882517721],
 [34.12906552345305, 0.5942292687053243],
 [60.349326395230484, 0.6067448858792008],
 [97.56256911880097, 0.6181423222346267],
 [26.572702588363036, 0.584850221834553],
 [70.69014874935483, 0.6038840439879216],
 [34.76581675485538, 0.587663886657299]]

In [4]:
array #484.24

[[97.20502597497644, 0.6076432874526915],
 [49.77491683745057, 0.6003317370956137],
 [14.017308798095296, 0.5560618417634258],
 [79.93117694500465, 0.6135774174143143],
 [98.22129176475292, 0.6141999733393998],
 [18.112306335835463, 0.571593586672919],
 [73.16369359072172, 0.6117045031040764],
 [8.241546552145927, 0.5260145461351127],
 [46.54122914922052, 0.5966631298786769]]

In [6]:
array #732.87

[[39.73032878212709, 0.6021935414690187],
 [10.595937520042163, 0.5367866752050835],
 [51.529047710234636, 0.6035835489022048],
 [51.0795273032218, 0.6087265520905637],
 [86.85512109914396, 0.6090624351727835],
 [68.94130191450179, 0.6101551767152814],
 [32.75762943024612, 0.5886479338339674],
 [79.80995639325258, 0.6107819498076983],
 [83.7469480939087, 0.6061108201435873],
 [10.767963120053022, 0.5315013508031983],
 [89.53585490883046, 0.617020804030599],
 [68.95447191390741, 0.6085291052209971],
 [8.667761651351372, 0.5258547916199315],
 [20.73425821005075, 0.5671540367513078],
 [24.95038680981864, 0.585025787249171],
 [70.88110782446222, 0.6136522610412471],
 [34.95412897557501, 0.593535977839417],
 [71.87090221532823, 0.6113372450369973],
 [69.17335520849385, 0.6145904240960516],
 [79.92649097734815, 0.6097608032788292],
 [95.7012762576808, 0.6145235290775581],
 [98.60446576842725, 0.6088260679782895],
 [60.54773742163389, 0.6043799913306916],
 [91.61201745745771, 0.61259581002095

In [4]:
array

[[60.396309920250815, 0.5995405053376486],
 [75.48112924297375, 0.6076750681683537],
 [59.27393710759018, 0.6079650121702811],
 [94.14622055570538, 0.6141745984613832]]

In [4]:
array # 709.57

[[36.94319475826971, 0.5987794985039477],
 [46.14999876351392, 0.6001882477988422],
 [48.46864754388819, 0.5981813506557064],
 [44.574558238933854, 0.6070014470159139],
 [38.41508257295905, 0.5945904953860542],
 [8.237793742837738, 0.5350987601233624],
 [40.569165544511044, 0.5994683680301085],
 [48.75495737438963, 0.6057830337276054],
 [62.38863362448205, 0.6099397091813512],
 [62.52527578731046, 0.6140067161580154],
 [96.3684354170361, 0.6090328668215866],
 [11.533082494757622, 0.5457012319478701],
 [5.876306996549661, 0.5090757726601141],
 [15.961458942653149, 0.5594327208331493],
 [43.968990842777174, 0.605954517559999],
 [31.454806772387524, 0.5867474442701333],
 [61.48973920775618, 0.6109186368022065],
 [81.04497180613224, 0.6174102776610534],
 [34.918812944575365, 0.5966486631736432],
 [99.59262139495203, 0.6142646106907486],
 [99.44744717286511, 0.6091142506073596],
 [83.36334802448715, 0.6126112527420333],
 [85.46183609001923, 0.6200680921383169],
 [82.7707949017874, 0.6096782

In [6]:
array #1389.55

[[60.927397396417106, 0.6066224893392048],
 [31.30485481450014, 0.5976313423438571],
 [93.53664162959849, 0.6114894445889173],
 [31.280617114272843, 0.5890119074233477],
 [86.37863130038224, 0.6132691342646599],
 [50.365420190950296, 0.604650528010384],
 [18.780911912845525, 0.5646430704718374],
 [57.41887407949496, 0.6069432237929341],
 [71.31712654214405, 0.6152305380176268],
 [43.68365842446312, 0.6021641154164694],
 [83.97796546759054, 0.6125875589763022],
 [42.13227743112521, 0.6024282517747098]]

In [4]:
array #940

[[7.020515337595652, 0.5147708462632566],
 [46.36974821499358, 0.6044926386445703],
 [5.260399882376543, 0.49471010843303015],
 [89.28853791025723, 0.6186136325846924]]

In [8]:
metric = pd.DataFrame(array)
metric.columns = ['features','MRR']
metric.head()

Unnamed: 0,features,MRR
0,7.020515,0.514771
1,46.369748,0.604493
2,5.2604,0.49471
3,89.288538,0.618614
4,60.927397,0.606622


In [9]:
metric.sort_values(by=['MRR'],ascending=False).head(10)

Unnamed: 0,features,MRR
56,87.279545,0.626772
43,71.085249,0.620264
38,85.461836,0.620068
42,70.650903,0.619841
3,89.288538,0.618614
33,81.044972,0.61741
61,78.202564,0.617154
76,89.535855,0.617021
55,76.229997,0.616299
41,75.992887,0.615888


In [10]:
metric.to_csv('metric_MRR.csv', index=False)

In [7]:
array = [[7.020515337595652, 0.5147708462632566],
 [46.36974821499358, 0.6044926386445703],
 [5.260399882376543, 0.49471010843303015],
 [89.28853791025723, 0.6186136325846924],
 [60.927397396417106, 0.6066224893392048],
 [31.30485481450014, 0.5976313423438571],
 [93.53664162959849, 0.6114894445889173],
 [31.280617114272843, 0.5890119074233477],
 [86.37863130038224, 0.6132691342646599],
 [50.365420190950296, 0.604650528010384],
 [18.780911912845525, 0.5646430704718374],
 [57.41887407949496, 0.6069432237929341],
 [71.31712654214405, 0.6152305380176268],
 [43.68365842446312, 0.6021641154164694],
 [83.97796546759054, 0.6125875589763022],
 [42.13227743112521, 0.6024282517747098],
 [36.94319475826971, 0.5987794985039477],
 [46.14999876351392, 0.6001882477988422],
 [48.46864754388819, 0.5981813506557064],
 [44.574558238933854, 0.6070014470159139],
 [38.41508257295905, 0.5945904953860542],
 [8.237793742837738, 0.5350987601233624],
 [40.569165544511044, 0.5994683680301085],
 [48.75495737438963, 0.6057830337276054],
 [62.38863362448205, 0.6099397091813512],
 [62.52527578731046, 0.6140067161580154],
 [96.3684354170361, 0.6090328668215866],
 [11.533082494757622, 0.5457012319478701],
 [5.876306996549661, 0.5090757726601141],
 [15.961458942653149, 0.5594327208331493],
 [43.968990842777174, 0.605954517559999],
 [31.454806772387524, 0.5867474442701333],
 [61.48973920775618, 0.6109186368022065],
 [81.04497180613224, 0.6174102776610534],
 [34.918812944575365, 0.5966486631736432],
 [99.59262139495203, 0.6142646106907486],
 [99.44744717286511, 0.6091142506073596],
 [83.36334802448715, 0.6126112527420333],
 [85.46183609001923, 0.6200680921383169],
 [82.7707949017874, 0.6096782353071498],
 [83.93690859425682, 0.6147986539716936],
 [75.99288686911567, 0.6158884287442824],
 [70.65090310554237, 0.6198405084276105],
 [71.0852487194779, 0.6202637070178311],
 [92.65609612347822, 0.6078379348839654],
 [90.20453829579586, 0.6112942438694994],
 [56.96511646268218, 0.6099533613659851],
 [69.81558610378244, 0.6156886826052338],
 [55.047565575249436, 0.608873949489404],
 [71.11093674355824, 0.6133733204523774],
 [29.051651588467166, 0.5942962639369752],
 [90.56673636874481, 0.6033450604295965],
 [68.12124101598111, 0.6109257780126568],
 [75.00752191574774, 0.6146476203276319],
 [54.917428966155896, 0.6087724949404766],
 [76.22999684774362, 0.6162989225700444],
 [87.27954467339495, 0.6267724833309147],
 [23.45328089796067, 0.5841440644369714],
 [95.79854619258785, 0.61143285796236],
 [65.74262131666359, 0.6132793207364869],
 [51.28579820908615, 0.597262480340034],
 [78.20256351173192, 0.6171538131959077],
 [60.396309920250815, 0.5995405053376486],
 [75.48112924297375, 0.6076750681683537],
 [59.27393710759018, 0.6079650121702811],
 [94.14622055570538, 0.6141745984613832],
 [39.73032878212709, 0.6021935414690187],
 [10.595937520042163, 0.5367866752050835],
 [51.529047710234636, 0.6035835489022048],
 [51.0795273032218, 0.6087265520905637],
 [86.85512109914396, 0.6090624351727835],
 [68.94130191450179, 0.6101551767152814],
 [32.75762943024612, 0.5886479338339674],
 [79.80995639325258, 0.6107819498076983],
 [83.7469480939087, 0.6061108201435873],
 [10.767963120053022, 0.5315013508031983],
 [89.53585490883046, 0.617020804030599],
 [68.95447191390741, 0.6085291052209971],
 [8.667761651351372, 0.5258547916199315],
 [20.73425821005075, 0.5671540367513078],
 [24.95038680981864, 0.585025787249171],
 [70.88110782446222, 0.6136522610412471],
 [34.95412897557501, 0.593535977839417],
 [71.87090221532823, 0.6113372450369973],
 [69.17335520849385, 0.6145904240960516],
 [79.92649097734815, 0.6097608032788292],
 [95.7012762576808, 0.6145235290775581],
 [98.60446576842725, 0.6088260679782895],
 [60.54773742163389, 0.6043799913306916],
 [91.61201745745771, 0.6125958100209551],
 [61.069718619876504, 0.6096120771333479],
 [97.20502597497644, 0.6076432874526915],
 [49.77491683745057, 0.6003317370956137],
 [14.017308798095296, 0.5560618417634258],
 [79.93117694500465, 0.6135774174143143],
 [98.22129176475292, 0.6141999733393998],
 [18.112306335835463, 0.571593586672919],
 [73.16369359072172, 0.6117045031040764],
 [8.241546552145927, 0.5260145461351127],
 [46.54122914922052, 0.5966631298786769]]