<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - HPF MRR</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.hpf import HPF as hpfl
from hpfrec import HPF
import pandas as pd
import numpy as np
from hyperopt import fmin, tpe, hp, STATUS_OK

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features):
    all_recs = []
    test_data = []
    features=int(features)
    
    hpf = hpfl(features,verbose=False) # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('HPF', hpf, train, test))
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.recip_rank)
    results = rla.compute(all_recs, test_data)
                          
    return results["recip_rank"].mean()

In [13]:
array = []
def objective(params):
    features = params['features']
    metric = model_trainer(features)
    array.append([features,metric])
    return {'loss': -metric, 'status': STATUS_OK}

space={'features': hp.uniform('features', 1, 100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1680                                                                                                  
Latent factors to use: 79                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...  

Final RMSE: 2.3156                                                                                                     
Minutes taken (optimization part): 0.3                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1677                                                                                                  
Latent factors to use: 79                                                                                              
Initializing parameters...              

                                                                                                                       

Optimization finished
Final log-likelihood: -256317                                                                                          
Final RMSE: 2.3432                                                                                                     
Minutes taken (optimization part): 0.2                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 

Iteration 90 | train llk: -261124 | train rmse: 2.3547                                                                 
Iteration 100 | train llk: -260287 | train rmse: 2.3524                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -260287                                                                                          
Final RMSE: 2.3524                                                                                                     
Minutes taken (optimization part): 0.2                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
*****************

Iteration 70 | train llk: -274545 | train rmse: 2.4012                                                                 
Iteration 80 | train llk: -272990 | train rmse: 2.3963                                                                 
Iteration 90 | train llk: -271943 | train rmse: 2.3930                                                                 
Iteration 100 | train llk: -271153 | train rmse: 2.3904                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -271153                                                                                          
Final RMSE: 2.3904                                                                                                     
Minutes taken (optimization part): 0.1                                                                                 
*****************

Iteration 50 | train llk: -280356 | train rmse: 2.4180                                                                 
Iteration 60 | train llk: -277346 | train rmse: 2.4082                                                                 
Iteration 70 | train llk: -275192 | train rmse: 2.4009                                                                 
Iteration 80 | train llk: -273474 | train rmse: 2.3956                                                                 
Iteration 90 | train llk: -272024 | train rmse: 2.3909                                                                 
Iteration 100 | train llk: -270894 | train rmse: 2.3875                                                                
                                                                                                                       

Optimization finished
Final log-likelihood: -270894                                                                                          
Final RMSE: 2.387

KeyboardInterrupt: 

In [16]:
array = [[77.55065396777678, 0.6320892829816492],
 [89.70366296995682, 0.6146566900995533],
 [31.639933646171322, 0.6544950447529269],
 [7.043305962495642, 0.5958213221321255],
 [24.41846172412374, 0.6304705253450288],
 [76.76481802959346, 0.6205194634864248],
 [38.906001399798186, 0.623299223867596],
 [27.09424989372053, 0.6271463707657792],
 [36.147677934505786, 0.6409786943460749],
 [97.17160355861982, 0.6227624639846036],
 [25.180457156488426, 0.6273036461154918],
 [92.78812802845108, 0.6292593217859433],
 [67.01749041243802, 0.6311504768469749],
 [20.65919018720629, 0.631319851065858],
 [82.16194110307818, 0.6027200909141324],
 [57.183766974024635, 0.6056956760971015],
 [17.41116327496534, 0.6422849445171228],
 [37.56829977982472, 0.6455651468353156],
 [59.7720382553142, 0.623245229196316],
 [55.98200796797128, 0.6221120444035015],
 [40.31000761986902, 0.6321505634316579],
 [7.1450480426885505, 0.6059793178536264],
 [42.304488285030295, 0.62654049607271],
 [47.396799920393605, 0.6375268513045612],
 [31.83359580821348, 0.6308129094329122],
 [50.58046224388475, 0.6307565275140682],
 [12.30076842652846, 0.6192892045448067],
 [68.58630121106822, 0.6277957236441857],
 [72.16559883604013, 0.6188705108370138],
 [23.277849658771917, 0.6383650635267996],
 [36.637538366201525, 0.6371401116771256],
 [39.767433754176054, 0.64594980577962],
 [65.19132413667043, 0.6240352996950216],
 [50.55268199689872, 0.6221583881348258],
 [76.32762502732623, 0.6192742486935091],
 [5.038368542461655, 0.614617453584875],
 [68.05689240865846, 0.6136959402130484],
 [27.162682694322985, 0.6404076970406773],
 [10.692216769675317, 0.6320270947598469],
 [75.76730092928894, 0.5948785023446462],
 [24.439846619849266, 0.6480204425025513],
 [59.604043870077874, 0.6101121987483699],
 [20.326527331926698, 0.6444938551676446],
 [66.5934188111337, 0.6198210272860553],
 [62.13597550996691, 0.6171269349774073],
 [78.84331805986507, 0.617825260479445],
 [38.77046761160387, 0.6312140846815908],
 [30.34425694173914, 0.6355506804424967],
 [14.043402831240051, 0.6454995140773434],
 [45.629150898107014, 0.6358097559768161],
 [85.24149263290262, 0.6168280324936837],
 [87.69923378708674, 0.6284189421674122],
 [71.27847593985942, 0.6177545391841074],
 [25.33522684632587, 0.6471198297733625],
 [21.47796307972385, 0.6450594877427547],
 [57.46823449832021, 0.6247185930523442],
 [11.484677240148107, 0.6034534519078911],
 [14.119710037354547, 0.6236733620199875],
 [90.48099653877794, 0.6090952243385073],
 [29.399502476278325, 0.6215113516814363],
 [12.129253235109275, 0.6447223245835411],
 [64.46080361403725, 0.5929009956605883],
 [30.774196905630085, 0.6303396351094798],
 [11.09214389344497, 0.6321786805415233],
 [78.84697870268235, 0.6145527124320642],
 [33.97772534368783, 0.6320702095720839],
 [77.21460416940018, 0.6251308878772128],
 [33.09577123018296, 0.6497419434149598],
 [75.7548104408701, 0.6150067375930129],
 [43.87491423384899, 0.6347928171275253],
 [10.126712078623463, 0.6163926151201515],
 [73.13222934491081, 0.6228855179798685],
 [13.298636964146718, 0.6319929658477089],
 [5.745541472548556, 0.5798983268186692],
 [51.07753369783381, 0.6310732582364519],
 [48.27039823204833, 0.608110557734599],
 [70.67036202963719, 0.6084508466852947],
 [11.911012833737168, 0.6171430064245845],
 [23.27205318710343, 0.6480115469432688],
 [74.65148756171158, 0.6063280883629821],
 [46.03975585418294, 0.6385532877511736],
 [88.73063334518115, 0.6157987552702076],
 [15.112014259630255, 0.632248639336992],
 [70.54701162674672, 0.6425789357114744],
 [66.62072575726674, 0.6220946065606155],
 [75.30990062793798, 0.6221431677132968],
 [45.764093483677726, 0.6093839039129627],
 [14.745069708180687, 0.6328860524732964],
 [37.74487976038759, 0.639881972005122],
 [62.92273159069691, 0.6317943579013554],
 [27.99221881307002, 0.6399269316884199],
 [97.87903638080532, 0.5967408998628895],
 [25.463424755488077, 0.6377285977028051],
 [57.83880021980526, 0.6297172460089651],
 [81.01585364947968, 0.6198450193944232],
 [29.61304667313932, 0.6563550659912724],
 [31.447579789710964, 0.632323242863966],
 [79.51186669719826, 0.6035403387860329],
 [46.89432075329132, 0.6414333683023186],
 [29.689344432140643, 0.6381816152027437]]

In [17]:
metric = pd.DataFrame(array)
metric.columns = ['features','MRR']
metric.head()

Unnamed: 0,features,MRR
0,77.550654,0.632089
1,89.703663,0.614657
2,31.639934,0.654495
3,7.043306,0.595821
4,24.418462,0.630471


In [19]:
metric.sort_values(by=['MRR'],ascending=False).head(10)

Unnamed: 0,features,MRR
95,29.613047,0.656355
2,31.639934,0.654495
67,33.095771,0.649742
40,24.439847,0.64802
78,23.272053,0.648012
53,25.335227,0.64712
31,39.767434,0.64595
17,37.5683,0.645565
48,14.043403,0.6455
54,21.477963,0.645059


In [20]:
metric.to_csv('metric_MRR.csv', index=False)