<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - IMF MRR</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [2]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, als
from lenskit.algorithms.als import ImplicitMF
from lenskit import topn
import pandas as pd
import numpy as np

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])

def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning (create an exact copy of the original object)
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features,reg,weight):
    all_recs = []
    test_data = []
    
    features=int(features)
    imf = ImplicitMF(features=features, iterations=100, reg=reg, weight=weight) # define algorithm# define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('IMF', imf, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.recip_rank)
    results = rla.compute(all_recs, test_data)
                                                    
    return results["recip_rank"].mean()

In [3]:
from hyperopt import fmin, tpe, hp, STATUS_OK
array=[]
def objective(params):
    features = params['features']
    reg = params['reg']
    weight=params['weight']
    metric = model_trainer(features,reg,weight)
    array.append([features,reg,weight,metric])
    return {'loss': -metric, 'status': STATUS_OK}

space={'features': hp.uniform('features', 1, 100),
       'reg': hp.uniform('reg',0.01,0.4),
      'weight': hp.uniform('weight',0,100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

100%|██████████████████████████████████████████████| 100/100 [2:23:34<00:00, 46.77s/it, best loss: -0.6423610577197649]


In [4]:
metric = pd.DataFrame(array)
metric.columns = ['features','reg','weight','MRR']
metric.head(5)

Unnamed: 0,features,reg,weight,MRR
0,15.603384,0.018185,94.134064,0.205081
1,79.359824,0.275076,22.38399,0.443254
2,56.922012,0.056915,39.344147,0.380412
3,95.084841,0.039226,61.798909,0.455542
4,91.317623,0.265114,27.216488,0.447754


In [5]:
metric.sort_values(by=['MRR'],ascending=False).head(10)

Unnamed: 0,features,reg,weight,MRR
82,75.845988,0.145043,0.08773,0.642361
21,70.910635,0.124766,0.120537,0.630474
75,84.286476,0.13486,0.438481,0.626608
20,69.758446,0.124342,0.2447,0.624975
65,62.771888,0.296188,0.581682,0.623062
25,63.33951,0.221809,0.956304,0.614989
52,87.606654,0.086494,0.490825,0.605912
31,99.855584,0.110658,0.294567,0.602588
89,93.519695,0.24457,0.179039,0.6006
15,75.421895,0.3711,1.455997,0.597002


In [6]:
metric.to_csv('metric_MRR.csv', index=False)

In [3]:
metric = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Algoritmos\\Implicit Matrix Factorization\\results\\metric_MRR.csv')
metric

Unnamed: 0,features,reg,weight,MRR
0,15.603384,0.018185,94.134064,0.205081
1,79.359824,0.275076,22.383990,0.443254
2,56.922012,0.056915,39.344147,0.380412
3,95.084841,0.039226,61.798909,0.455542
4,91.317623,0.265114,27.216488,0.447754
...,...,...,...,...
95,65.064702,0.074692,2.218025,0.574311
96,59.318495,0.152874,93.901579,0.345035
97,97.352990,0.163215,40.754148,0.434504
98,54.502542,0.200396,5.754118,0.515558


In [4]:
%matplotlib qt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
colormap = cm.hot
x = metric['reg']
y = metric['features'] 
z = metric['weight']
c = metric['MRR']

fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')
img = ax.scatter(x, y, z, c=c, cmap=plt.jet())
ax.set_xlabel('reg')
ax.set_ylabel('features')
ax.set_zlabel('weight')
fig.colorbar(img)
plt.show()