<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - HPF MAE</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.hpf import HPF as hpfl
from lenskit.metrics.predict import mae
import pandas as pd
import numpy as np
from hpfrec import HPF
from hyperopt import fmin, tpe, hp, STATUS_OK

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # train the algorithm with the training dataset
    # Now we run the recommender:
    preds = batch.predict(fittable, test)
    # add the algorithm name for analyzability
    return preds

In [5]:
def model_trainer(features):
    all_recs = []
    test_data = []
    features=int(features)
    
    hpf = hpfl(features,verbose=False) # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('HPF', hpf, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)

    metric = mae(all_recs['prediction'], all_recs['rating'])
    return metric

In [9]:
array = []

def objective(params):
    features = params['features']
    metric = model_trainer(features)
    array.append([features,metric])
    return {'loss': -metric, 'status': STATUS_OK}

space={'features': hp.uniform('features', 1, 100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 1672                                                                                                  
Latent factors to use: 72                                                                                              
Initializing parameters...                                                                                             
Allocating Phi matrix...                                                                                               
Initializing optimization procedure...  

                                                                                                                       

Optimization finished
Final log-likelihood: -243077                                                                                          
Final RMSE: 2.3076                                                                                                     
Minutes taken (optimization part): 0.2                                                                                 
**********************************                                                                                     
Hierarchical Poisson Factorization                                                                                     
**********************************                                                                                     
Number of users: 943                                                                                                   
Number of items: 

In [10]:
metric = pd.DataFrame(array)
metric.columns = ['features','MAE']
metric.head(5)

Unnamed: 0,features,reg,RMSE
0,10.312233,0.052351,0.914337
1,10.449474,0.106525,0.91194
2,69.224375,0.18455,0.910361
3,82.33336,0.238358,0.913553
4,70.525621,0.25824,0.906652


In [11]:
metric.sort_values(by=['MAE'],ascending=True).head(10)

Unnamed: 0,features,reg,RMSE
47,67.969018,0.159718,0.900337
74,61.00737,0.156938,0.902556
70,82.091355,0.169876,0.902591
52,83.795977,0.04535,0.903057
37,88.37236,0.250508,0.903183
82,72.217708,0.124464,0.904008
93,70.098977,0.057532,0.904184
67,96.963554,0.285707,0.904514
40,87.92582,0.207074,0.904645
42,99.191023,0.213207,0.904821


In [12]:
metric.to_csv('metric_MAE.csv', index=False)