<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - IMF F1 </h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, als
from lenskit.algorithms.als import ImplicitMF
import pandas as pd
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features,reg,weight):
    all_recs = []
    test_data = []
    
    features=int(features)
    imf = ImplicitMF(features=features, iterations=100, reg=reg, weight=weight) # define algorithm# define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('IMF', imf, train, test))
     
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla1 = topn.RecListAnalysis()
    rla2 = topn.RecListAnalysis()
    rla1.add_metric(topn.precision)
    rla2.add_metric(topn.recall)
    prec = rla1.compute(all_recs, test_data)
    rec = rla2.compute(all_recs, test_data)
    F1 = ((prec['precision'].values*rec['recall'].values*2)/(prec['precision'].values+rec['recall'].values))
    F1 = np.nan_to_num(F1)
    return [F1.mean(),prec['precision'].mean(),rec['recall'].mean()]

In [3]:
from hyperopt import fmin, tpe, hp, STATUS_OK
array=[]
def objective(params):
    features = params['features']
    reg = params['reg']
    weight=params['weight']
    metric = model_trainer(features,reg,weight)
    array.append([features,reg,weight,metric[0],metric[1],metric[2]])
    return {'loss': -metric[0], 'status': STATUS_OK }

space={'features': hp.uniform('features', 1, 100),
       'reg': hp.uniform('reg',0.01,0.4),
      'weight': hp.uniform('weight',0,100)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

100%|███████████████████████████████████████████████| 100/100 [42:20<00:00, 19.35s/it, best loss: -0.20508158574767474]


In [4]:
metric = pd.DataFrame(array)
metric.columns = ['features','reg','weight','F1','precision','recall']
metric.head(5)

Unnamed: 0,features,reg,weight,F1,precision,recall
0,27.243368,0.242424,71.078737,0.152166,0.096373,0.613161
1,33.422175,0.244682,38.341361,0.170035,0.1093,0.649172
2,56.569336,0.374225,92.495425,0.166547,0.108929,0.610335
3,78.291784,0.17621,96.596103,0.16449,0.108346,0.592056
4,30.601178,0.079688,20.508658,0.178633,0.116098,0.663736


In [5]:
metric.sort_values(by=['F1'],ascending=False).head(10)

Unnamed: 0,features,reg,weight,F1,precision,recall
68,17.055281,0.349795,0.324471,0.205082,0.13579,0.722561
82,18.316656,0.222493,1.913022,0.20451,0.135048,0.725996
67,18.170682,0.371135,2.884363,0.202843,0.133881,0.721065
93,22.259789,0.343127,2.431647,0.202046,0.133446,0.721105
87,32.677871,0.110997,0.37987,0.201946,0.133362,0.714126
75,18.550461,0.275219,2.923379,0.200576,0.132587,0.713011
74,23.341674,0.307896,3.720886,0.198056,0.130615,0.708369
48,30.252813,0.356737,5.074903,0.194413,0.127837,0.701506
20,39.918318,0.314598,3.126051,0.193826,0.127762,0.697274
83,13.653177,0.125579,6.838862,0.193404,0.127349,0.69773


In [6]:
metric.to_csv('metric_F1.csv', index=False)

In [6]:
metric = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Algoritmos\\Implicit Matrix Factorization\\results\\metric_F1.csv')
metric

Unnamed: 0,features,reg,weight,F1,precision,recall
0,27.243368,0.242424,71.078737,0.152166,0.096373,0.613161
1,33.422175,0.244682,38.341361,0.170035,0.109300,0.649172
2,56.569336,0.374225,92.495425,0.166547,0.108929,0.610335
3,78.291784,0.176210,96.596103,0.164490,0.108346,0.592056
4,30.601178,0.079688,20.508658,0.178633,0.116098,0.663736
...,...,...,...,...,...,...
95,80.624821,0.051806,58.705282,0.164541,0.108155,0.597549
96,51.532015,0.318330,15.685673,0.178507,0.116882,0.655852
97,30.826929,0.200704,21.740169,0.175543,0.113913,0.656044
98,12.902372,0.286318,99.803794,0.131756,0.082195,0.561086


In [7]:
%matplotlib qt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
colormap = cm.hot
x = metric['reg']
y = metric['features'] 
z = metric['weight']
c = metric['F1']

fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')
img = ax.scatter(x, y, z, c=c, cmap=plt.jet())
ax.set_xlabel('reg')
ax.set_ylabel('features')
ax.set_zlabel('weight')
fig.colorbar(img)
plt.show()