<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - HPF nDCG</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.hpf import HPF as hpfl
from hpfrec import HPF
import pandas as pd
import numpy as np
from hyperopt import fmin, tpe, hp, STATUS_OK

ratings = pd.read_csv('D:\\Escritorio\\UM\\Tesis\\ML 1M\\ratings.dat', sep='::',engine='python',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features):
    all_recs = []
    test_data = []
    features=int(features)
    
    hpf = hpfl(features,maxiter=1) # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('HPF', hpf, train, test))
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(all_recs, test_data)
    return results["ndcg"].mean()

In [3]:
ndcg = model_trainer(200)

**********************************
Hierarchical Poisson Factorization
**********************************

Number of users: 6040
Number of items: 3701
Latent factors to use: 200

Initializing parameters...
Allocating Phi matrix...
Initializing optimization procedure...
Iteration 10 | train llk: -4455094 | train rmse: 2.8322
Iteration 20 | train llk: -3602046 | train rmse: 2.6264
Iteration 30 | train llk: -3289333 | train rmse: 2.5446
Iteration 40 | train llk: -3131203 | train rmse: 2.5020
Iteration 50 | train llk: -3038441 | train rmse: 2.4762
Iteration 60 | train llk: -2977287 | train rmse: 2.4590
Iteration 70 | train llk: -2933962 | train rmse: 2.4468
Iteration 80 | train llk: -2902689 | train rmse: 2.4379
Iteration 90 | train llk: -2877892 | train rmse: 2.4307
Iteration 100 | train llk: -2857787 | train rmse: 2.4250


Optimization finished
Final log-likelihood: -2857787
Final RMSE: 2.4250
Minutes taken (optimization part): 6.2

**********************************
Hierarchical Poisson 

In [4]:
ndcg

0.400328483416926

In [3]:
array = []
def objective(params):
    features = params['features']
    metric = model_trainer(features)
    array.append([features,metric])
    return {'loss': metric, 'status': STATUS_OK}

space={'features': hp.uniform('features', 1, 100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 6040                                                                                                  
Number of items: 3702                                                                                                  
Latent factors to use: 40                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...  

                                                                                                                       

Optimization finished
Final log-likelihood: -3161714                                                                                         
Final RMSE: 2.5055                                                                                                     
Minutes taken (optimization part): 5.8                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 6040                                                                                                  
Number of items: 

Iteration 70 | train llk: -3870128 | train rmse: 2.6901                                                                
Iteration 80 | train llk: -3852866 | train rmse: 2.6861                                                                
Iteration 90 | train llk: -3841828 | train rmse: 2.6831                                                                
Iteration 100 | train llk: -3834405 | train rmse: 2.6809                                                               
                                                                                                                       

Optimization finished
Final log-likelihood: -3834405                                                                                         
Final RMSE: 2.6809                                                                                                     
Minutes taken (optimization part): 1.3                                                                                 
*****************

Iteration 30 | train llk: -3691670 | train rmse: 2.6434                                                                
Iteration 40 | train llk: -3548811 | train rmse: 2.6072                                                                
Iteration 50 | train llk: -3470331 | train rmse: 2.5865                                                                
Iteration 60 | train llk: -3421426 | train rmse: 2.5735                                                                
Iteration 70 | train llk: -3387105 | train rmse: 2.5641                                                                
Iteration 80 | train llk: -3361871 | train rmse: 2.5575                                                                
Iteration 90 | train llk: -3342857 | train rmse: 2.5526                                                                
Iteration 100 | train llk: -3328003 | train rmse: 2.5490                                                               
                                        

Allocating Phi matrix...                                                                                               
Initializing optimization procedure...                                                                                 
Iteration 10 | train llk: -4839002 | train rmse: 2.9022                                                                
Iteration 20 | train llk: -3961048 | train rmse: 2.7087                                                                
Iteration 30 | train llk: -3638082 | train rmse: 2.6295                                                                
Iteration 40 | train llk: -3468820 | train rmse: 2.5871                                                                
Iteration 50 | train llk: -3376737 | train rmse: 2.5637                                                                
Iteration 60 | train llk: -3323327 | train rmse: 2.5502                                                                
Iteration 70 | train llk: -3286633 | tra

KeyboardInterrupt: 

In [8]:
array # 540.47

[[46.03245754114246, 0.3889689891633376],
 [10.638205362284651, 0.3242665416003691],
 [53.99998708290729, 0.39409359092121626],
 [70.64404308778502, 0.39914125634610154],
 [42.3220360852923, 0.3894496964624833],
 [94.24384159908107, 0.4053535614799483],
 [42.78969149552001, 0.3909731220249033],
 [99.83634607152612, 0.40506217370524955],
 [31.55381584070685, 0.3804560076736961],
 [90.60578932277534, 0.405030784854978],
 [42.86920781727429, 0.3859073110379881],
 [22.22881238741727, 0.3651337685574952],
 [62.168657190044335, 0.40230097600281095],
 [26.10615448141448, 0.37083620322330774],
 [86.5450049222734, 0.405335558689985],
 [50.05890767927245, 0.3948007217359485],
 [56.89190840916548, 0.39671257520813136],
 [62.52961373641129, 0.39666424524404764],
 [19.88444706319474, 0.36129040099809345],
 [57.34440585148276, 0.39494607090002737]]

In [6]:
array

[[70.57168517715446, 0.39970063543638185]]

In [4]:
array

[[64.16889454623012, 0.40128780344780224],
 [57.9432052583412, 0.3956184796906879],
 [90.3320192250555, 0.39954674964673814],
 [69.67268612521052, 0.39875664200217353],
 [9.027747323342187, 0.31738036613793164],
 [18.028382891597815, 0.35850937418720363],
 [69.44005536792127, 0.4018362006229848],
 [52.87758366108038, 0.39366834428393666],
 [92.40707064999124, 0.40410231867781055],
 [11.2591372457911, 0.3311216468950095],
 [9.089088927352236, 0.32046343521049786],
 [83.96876967750167, 0.40130709898772415],
 [10.57577288641147, 0.32561677992444793],
 [41.58706333840771, 0.38802292895002577],
 [91.77589020398028, 0.3998755628015072],
 [38.60523065045376, 0.3854337723387175],
 [88.55171504482158, 0.4055972132138049],
 [76.44013700790613, 0.4024538395566857],
 [64.52718106786169, 0.4011382635537632]]

In [4]:
array

[[23.199433232913872, 0.3702609732139583],
 [15.41119422278794, 0.34903069262762987],
 [29.178634605686625, 0.377643458324074],
 [6.420676629189062, 0.29871506390069646],
 [73.97920145819117, 0.40314646494916945],
 [85.15688726276572, 0.4018351838969853],
 [30.354561228622963, 0.37842246121516276],
 [55.10890651657882, 0.39698036785898594],
 [74.90513076150964, 0.40509088382647185],
 [55.988963982923195, 0.3959840947095082],
 [36.706970489650324, 0.3846621587776013],
 [19.069935290569777, 0.35799848622696545],
 [24.775618894828735, 0.3668785164473996]]

In [10]:
array # 516.40

[[19.704117239799643, 0.3610241023377194],
 [95.07400409290368, 0.40618922533051943],
 [79.93429019092763, 0.3998328250324392],
 [95.12138581118117, 0.4023875405806301],
 [12.152156994183974, 0.33408961558144545],
 [86.92841182645392, 0.4020676949031186],
 [64.9713859274742, 0.3980582298509286],
 [5.448057288109046, 0.2927269228848858],
 [6.297932428418408, 0.2978160633999604],
 [19.872161142923648, 0.35939329608730775],
 [84.02980089766754, 0.39914730786346675],
 [63.264910313593894, 0.39667997599288746],
 [99.32608780339072, 0.40622923894312024],
 [97.0223746900284, 0.40244561174904875],
 [46.07069484780484, 0.38959197348799035],
 [47.23320893387159, 0.3907814023935565],
 [24.11297718195525, 0.36823093680049745],
 [85.33126620786123, 0.4024274496633911],
 [50.47595438880378, 0.39435269516553606],
 [30.301197454466998, 0.3770746869036446]]

In [8]:
array #539.82s/it

[[94.20872335554782, 0.40047300075806863],
 [64.7412917514767, 0.3998811259704453],
 [18.73846229979132, 0.35832816288436237],
 [91.85737323144325, 0.4070707538150645],
 [6.390789202244147, 0.2983549774542604]]

In [6]:
array 

[[79.93771987806369, 0.39745310384201926],
 [53.71705856029021, 0.3942975460086728],
 [13.689258828003538, 0.3390672561378367],
 [31.520315811228645, 0.3810059791939544],
 [67.8470392857221, 0.3967255617627476],
 [70.84886938087365, 0.4006417644929664],
 [36.49382150071445, 0.3852237697882613]]

In [4]:
array

[[40.85690846369436, 0.3860341795998637],
 [72.01980471903066, 0.39988301624393735],
 [15.116745037517475, 0.34667699959702164],
 [49.15257189552348, 0.392178888197196],
 [57.87045260349703, 0.392673832380934]]

In [4]:
array

[[9.110543216105077, 0.31580838988481713],
 [12.648920430733861, 0.33381201363439794],
 [27.98903560443836, 0.3699384140838549],
 [42.091341458276936, 0.39117137806575986],
 [83.79753566175134, 0.3985340997240504],
 [77.14079726542221, 0.4021184547328285],
 [16.820031635218605, 0.3538865224271037],
 [58.84061073500584, 0.39613952985869405],
 [29.38977030483631, 0.3767522455746386],
 [94.07226174613172, 0.4035357346774971]]

In [6]:
metric = pd.DataFrame(array)
metric.columns = ['features','nDCG']
metric.head()

Unnamed: 0,features,nDCG
0,9.110543,0.315808
1,12.64892,0.333812
2,27.989036,0.369938
3,42.091341,0.391171
4,83.797536,0.398534


In [7]:
metric.sort_values(by=['nDCG'],ascending=False).head(10)

Unnamed: 0,features,nDCG
20,91.857373,0.407071
34,99.326088,0.406229
23,95.074004,0.406189
71,88.551715,0.405597
80,94.243842,0.405354
89,86.545005,0.405336
50,74.905131,0.405091
82,99.836346,0.405062
84,90.605789,0.405031
63,92.407071,0.404102


In [8]:
metric.to_csv('metric_nDCG.csv', index=False)

In [5]:
array = [[9.110543216105077, 0.31580838988481713],
 [12.648920430733861, 0.33381201363439794],
 [27.98903560443836, 0.3699384140838549],
 [42.091341458276936, 0.39117137806575986],
 [83.79753566175134, 0.3985340997240504],
 [77.14079726542221, 0.4021184547328285],
 [16.820031635218605, 0.3538865224271037],
 [58.84061073500584, 0.39613952985869405],
 [29.38977030483631, 0.3767522455746386],
 [94.07226174613172, 0.4035357346774971],
 [79.93771987806369, 0.39745310384201926],
 [53.71705856029021, 0.3942975460086728],
 [13.689258828003538, 0.3390672561378367],
 [31.520315811228645, 0.3810059791939544],
 [67.8470392857221, 0.3967255617627476],
 [70.84886938087365, 0.4006417644929664],
 [36.49382150071445, 0.3852237697882613],
 [94.20872335554782, 0.40047300075806863],
 [64.7412917514767, 0.3998811259704453],
 [18.73846229979132, 0.35832816288436237],
 [91.85737323144325, 0.4070707538150645],
 [6.390789202244147, 0.2983549774542604],
 [19.704117239799643, 0.3610241023377194],
 [95.07400409290368, 0.40618922533051943],
 [79.93429019092763, 0.3998328250324392],
 [95.12138581118117, 0.4023875405806301],
 [12.152156994183974, 0.33408961558144545],
 [86.92841182645392, 0.4020676949031186],
 [64.9713859274742, 0.3980582298509286],
 [5.448057288109046, 0.2927269228848858],
 [6.297932428418408, 0.2978160633999604],
 [19.872161142923648, 0.35939329608730775],
 [84.02980089766754, 0.39914730786346675],
 [63.264910313593894, 0.39667997599288746],
 [99.32608780339072, 0.40622923894312024],
 [97.0223746900284, 0.40244561174904875],
 [46.07069484780484, 0.38959197348799035],
 [47.23320893387159, 0.3907814023935565],
 [24.11297718195525, 0.36823093680049745],
 [85.33126620786123, 0.4024274496633911],
 [50.47595438880378, 0.39435269516553606],
 [30.301197454466998, 0.3770746869036446],
 [23.199433232913872, 0.3702609732139583],
 [15.41119422278794, 0.34903069262762987],
 [29.178634605686625, 0.377643458324074],
 [6.420676629189062, 0.29871506390069646],
 [73.97920145819117, 0.40314646494916945],
 [85.15688726276572, 0.4018351838969853],
 [30.354561228622963, 0.37842246121516276],
 [55.10890651657882, 0.39698036785898594],
 [74.90513076150964, 0.40509088382647185],
 [55.988963982923195, 0.3959840947095082],
 [36.706970489650324, 0.3846621587776013],
 [19.069935290569777, 0.35799848622696545],
 [24.775618894828735, 0.3668785164473996],
 [64.16889454623012, 0.40128780344780224],
 [57.9432052583412, 0.3956184796906879],
 [90.3320192250555, 0.39954674964673814],
 [69.67268612521052, 0.39875664200217353],
 [9.027747323342187, 0.31738036613793164],
 [18.028382891597815, 0.35850937418720363],
 [69.44005536792127, 0.4018362006229848],
 [52.87758366108038, 0.39366834428393666],
 [92.40707064999124, 0.40410231867781055],
 [11.2591372457911, 0.3311216468950095],
 [9.089088927352236, 0.32046343521049786],
 [83.96876967750167, 0.40130709898772415],
 [10.57577288641147, 0.32561677992444793],
 [41.58706333840771, 0.38802292895002577],
 [91.77589020398028, 0.3998755628015072],
 [38.60523065045376, 0.3854337723387175],
 [88.55171504482158, 0.4055972132138049],
 [76.44013700790613, 0.4024538395566857],
 [64.52718106786169, 0.4011382635537632],
 [70.57168517715446, 0.39970063543638185],
 [46.03245754114246, 0.3889689891633376],
 [10.638205362284651, 0.3242665416003691],
 [53.99998708290729, 0.39409359092121626],
 [70.64404308778502, 0.39914125634610154],
 [42.3220360852923, 0.3894496964624833],
 [94.24384159908107, 0.4053535614799483],
 [42.78969149552001, 0.3909731220249033],
 [99.83634607152612, 0.40506217370524955],
 [31.55381584070685, 0.3804560076736961],
 [90.60578932277534, 0.405030784854978],
 [42.86920781727429, 0.3859073110379881],
 [22.22881238741727, 0.3651337685574952],
 [62.168657190044335, 0.40230097600281095],
 [26.10615448141448, 0.37083620322330774],
 [86.5450049222734, 0.405335558689985],
 [50.05890767927245, 0.3948007217359485],
 [56.89190840916548, 0.39671257520813136],
 [62.52961373641129, 0.39666424524404764],
 [19.88444706319474, 0.36129040099809345],
 [57.34440585148276, 0.39494607090002737],
 [40.85690846369436, 0.3860341795998637],
 [72.01980471903066, 0.39988301624393735],
 [15.116745037517475, 0.34667699959702164],
 [49.15257189552348, 0.392178888197196],
 [57.87045260349703, 0.392673832380934]]