<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - Funk SVD F1 </h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, funksvd
from lenskit.algorithms.funksvd import FunkSVD
import pandas as pd
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer(features,lrate,reg):
    all_recs = []
    test_data = []
    
    features=int(features)
    funksvd = FunkSVD(features=features, iterations=100, lrate=lrate, reg=reg, damping=5, range=None, bias=True) # define algorithm
      
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('FunkSVD', funksvd, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla1 = topn.RecListAnalysis()
    rla2 = topn.RecListAnalysis()
    rla1.add_metric(topn.precision)
    rla2.add_metric(topn.recall)
    prec = rla1.compute(all_recs, test_data)
    rec = rla2.compute(all_recs, test_data)
    F1 = ((prec['precision'].values*rec['recall'].values*2)/(prec['precision'].values+rec['recall'].values))
    F1 = np.nan_to_num(F1)
    return [F1.mean(),prec['precision'].mean(),rec['recall'].mean()]

In [3]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(params):
    features = params['features']
    lrate = params['lrate']
    reg = params['reg']
    metric = model_trainer(features,lrate,reg)
    print([features,lrate,reg,metric[0],metric[1],metric[2]],',')
    return {'loss': -metric[0], 'status': STATUS_OK }

space={'features': hp.uniform('features', 1, 100),
       'lrate': hp.uniform('lrate', 0.0005, 0.005),
       'reg': hp.uniform('reg',0.005,0.05)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

[36.53214556353606, 0.0019754474339964707, 0.022886059483245146, 0.057643884079879526, 0.03950159066808064, 0.1676575778236084]
,                                                                                                                      
[21.61512657764006, 0.0027449499213329036, 0.03949855752942255, 0.06100535500862676, 0.04204665959703084, 0.17248361311864477]
,                                                                                                                      
[18.588011808786877, 0.0010201291457008924, 0.02348935017951293, 0.062149132970608, 0.04339342523860024, 0.16994301315515423]
,                                                                                                                      
[82.18689874926794, 0.0016570101745883989, 0.04685368330032867, 0.034584199539536065, 0.023997879109225895, 0.09982088914742071]
,                                                                                                                      
[19.871403

[3.410288282595406, 0.004972544393127638, 0.005871799055743081, 0.06862596900999267, 0.0465323435843055, 0.2045532312702385]
,                                                                                                                      
[14.544443407632198, 0.004421609626086125, 0.00867173168766318, 0.059552578486395294, 0.040805938494167616, 0.17701443471325135]
,                                                                                                                      
[9.549066004121553, 0.004781247659720636, 0.03233710535266508, 0.0657795155503701, 0.04512195121951229, 0.192282072726179]
,                                                                                                                      
[20.265109608068773, 0.004233781309003526, 0.017961868060084266, 0.05821741522151371, 0.04021208907741251, 0.1652409765177073]
,                                                                                                                      
[11.027386444245

In [5]:
metric = pd.DataFrame(array)
metric.columns = ['features','lrate','reg','F1','Precision','Recall']
metric.head(5)

Unnamed: 0,features,lrate,reg,F1,Precision,Recall
0,36.532146,0.001975,0.022886,0.057644,0.039502,0.167658
1,21.615127,0.002745,0.039499,0.061005,0.042047,0.172484
2,18.588012,0.00102,0.023489,0.062149,0.043393,0.169943
3,82.186899,0.001657,0.046854,0.034584,0.023998,0.099821
4,19.871403,0.003303,0.026437,0.062072,0.042768,0.177312


In [6]:
metric.sort_values(by=['F1'],ascending=False).head(10)

Unnamed: 0,features,lrate,reg,F1,Precision,Recall
21,2.598613,0.003767,0.032516,0.075675,0.051315,0.23
64,1.238649,0.004511,0.008495,0.075406,0.050848,0.231697
78,1.188406,0.003019,0.035712,0.075251,0.05106,0.224416
73,1.099824,0.002733,0.036609,0.075184,0.051007,0.222287
29,6.686745,0.002706,0.028429,0.073953,0.050785,0.213013
13,2.091639,0.004349,0.039564,0.073913,0.050021,0.228648
92,5.610414,0.003338,0.035461,0.073791,0.05053,0.21579
65,1.055413,0.004578,0.026308,0.07359,0.049926,0.218647
22,1.367495,0.004651,0.031911,0.073582,0.049873,0.222841
54,1.328269,0.003854,0.031004,0.073323,0.049692,0.219363


In [7]:
metric.to_csv('metric_F1.csv', index=False)

In [2]:
metric = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Algoritmos\\FunkSVD\\results\\metric_F1.csv')
metric

Unnamed: 0,features,lrate,reg,F1,Precision,Recall
0,36.532146,0.001975,0.022886,0.057644,0.039502,0.167658
1,21.615127,0.002745,0.039499,0.061005,0.042047,0.172484
2,18.588012,0.001020,0.023489,0.062149,0.043393,0.169943
3,82.186899,0.001657,0.046854,0.034584,0.023998,0.099821
4,19.871403,0.003303,0.026437,0.062072,0.042768,0.177312
...,...,...,...,...,...,...
95,20.539674,0.003764,0.028751,0.059299,0.040764,0.168905
96,10.982711,0.004156,0.013596,0.067201,0.046087,0.200263
97,56.063302,0.002311,0.030212,0.050223,0.034624,0.142285
98,86.419837,0.001978,0.021267,0.041770,0.028802,0.120078


In [3]:
%matplotlib qt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
colormap = cm.hot
x = metric['reg']
y = metric['lrate']
z = metric['features'] 
c = metric['F1']


fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')
img = ax.scatter(x, y, z, c=c, cmap=plt.hsv())
ax.set_xlabel('reg')
ax.set_ylabel('learning rate')
ax.set_zlabel('features')
fig.colorbar(img)
plt.show()