<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - IMF nDCG</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, als
from lenskit.algorithms.als import ImplicitMF
from lenskit import topn
import pandas as pd
import numpy as np

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])

def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features,reg,weight):
    all_recs = []
    test_data = []
    
    features=int(features)
    imf = ImplicitMF(features=features, iterations=100, reg=reg, weight=weight) # define algorithm# define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('IMF', imf, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(all_recs, test_data)
                          
    return results["ndcg"].mean()

In [3]:
from hyperopt import fmin, tpe, hp, STATUS_OK
array=[]
def objective(params):
    features = params['features']
    reg = params['reg']
    weight=params['weight']
    metric = model_trainer(features,reg,weight)
    array.append([features,reg,weight,metric])
    return {'loss': -metric, 'status': STATUS_OK}

space={'features': hp.uniform('features', 1, 100),
       'reg': hp.uniform('reg',0.01,0.4),
      'weight': hp.uniform('weight',0,100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

100%|████████████████████████████████████████████████| 100/100 [41:57<00:00, 19.73s/it, best loss: -0.5002891539107306]


In [4]:
metric = pd.DataFrame(array)
metric.columns = ['features','reg','weight','nDCG']
metric.head(5)

Unnamed: 0,features,reg,weight,nDCG
0,83.511311,0.304108,99.320598,0.349682
1,55.654914,0.363653,9.208328,0.395828
2,85.81239,0.395029,45.663459,0.360242
3,76.863609,0.214006,12.478002,0.384014
4,54.504722,0.19331,82.742023,0.331181


In [16]:
metric.sort_values(by=['nDCG'],ascending=False).head(10)

Unnamed: 0,features,reg,weight,nDCG
72,16.150242,0.177772,0.156574,0.500289
94,32.492423,0.092546,0.129485,0.498131
69,12.547631,0.096099,0.033139,0.494644
66,29.53446,0.062991,1.26948,0.48905
25,11.46402,0.011059,1.177737,0.48747
24,12.4011,0.075462,1.286835,0.486571
68,30.381959,0.060914,1.484234,0.480769
79,8.181224,0.179737,0.349582,0.474686
75,20.713105,0.200089,4.129481,0.458794
55,27.452131,0.125313,3.884423,0.452768


In [6]:
metric.to_csv('metric_nDCG.csv', index=False)

In [2]:
metric = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Algoritmos\\Implicit Matrix Factorization\\results\\metric_nDCG.csv')
metric

Unnamed: 0,features,reg,weight,nDCG
0,83.511311,0.304108,99.320598,0.349682
1,55.654914,0.363653,9.208328,0.395828
2,85.812390,0.395029,45.663459,0.360242
3,76.863609,0.214006,12.478002,0.384014
4,54.504722,0.193310,82.742023,0.331181
...,...,...,...,...
95,21.921606,0.192380,5.590023,0.442474
96,33.093965,0.172566,60.151417,0.305655
97,37.961846,0.223252,83.672449,0.299510
98,17.994607,0.090044,14.309838,0.384214


In [3]:
%matplotlib qt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
colormap = cm.hot
x = metric['reg']
y = metric['features'] 
z = metric['weight']
c = metric['nDCG']

fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')
img = ax.scatter(x, y, z, c=c, cmap=plt.jet())
ax.set_xlabel('reg')
ax.set_ylabel('features')
ax.set_zlabel('weight')
fig.colorbar(img)
plt.show()