<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - FunkSVD nDCG</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, funksvd
from lenskit.algorithms.funksvd import FunkSVD
from lenskit import topn
import pandas as pd
import numpy as np

ratings = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Demo\\ml-100k\\u.data', sep='\t',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [4]:
def model_trainer(features,lrate,reg):
    all_recs = []
    test_data = []
    
    features=int(features)
    funksvd = FunkSVD(features=features, iterations=100, lrate=lrate, reg=reg, damping=5, range=None, bias=True) # define algorithm
    
    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('FunkSVD', funksvd, train, test))
    
    all_recs = pd.concat(all_recs, ignore_index=True)
    test_data = pd.concat(test_data, ignore_index=True)
    rla = topn.RecListAnalysis()
    rla.add_metric(topn.ndcg)
    results = rla.compute(all_recs, test_data)
                          
    return results["ndcg"].mean()

In [5]:
from hyperopt import fmin, tpe, hp, STATUS_OK

def objective(params):
    features = params['features']
    lrate = params['lrate']
    reg = params['reg']
    metric = model_trainer(features,lrate,reg)
    print([features,lrate,reg,metric],',')
    return {'loss': -metric, 'status': STATUS_OK }

space={'features': hp.uniform('features', 1, 100),
       'lrate': hp.uniform('lrate', 0.0005, 0.005),
       'reg': hp.uniform('reg',0.005,0.05)}

best = fmin(objective, space, algo=tpe.suggest,max_evals=100)

[4.226608873812975, 0.0045897130586514834, 0.027921548719885822, 0.13734923369229282]                                  
,                                                                                                                      
[45.198835280784486, 0.004488409236653506, 0.016684050085984743, 0.11486893175630614]                                  
,                                                                                                                      
[33.72114640702098, 0.002041859193444821, 0.03929804707076872, 0.10477126306641475]                                    
,                                                                                                                      
[50.20292969606549, 0.0008730668379908636, 0.03301900485188898, 0.08352456530819795]                                   
,                                                                                                                      
[45.58896635866637, 0.002968434144266861

[4.746697816595039, 0.002823782505875766, 0.04262947757328587, 0.14405206487259153]                                    
,                                                                                                                      
[27.143496661443777, 0.0020223251706994283, 0.0456213134051045, 0.1055772557954724]                                    
,                                                                                                                      
[14.692054597359167, 0.0022940456600275467, 0.04987531775511236, 0.13068032103997093]                                  
,                                                                                                                      
[11.11868392037577, 0.0037238364496778057, 0.039986050897346595, 0.1373286071873354]                                   
,                                                                                                                      
[4.402475343007088, 0.002378559827577524

In [7]:
metric = pd.DataFrame(array)
metric.columns = ['features','learning rate','reg','nDCG']
metric.head(5)

Unnamed: 0,features,learning rate,reg,nDCG
0,4.226609,0.00459,0.027922,0.137349
1,45.198835,0.004488,0.016684,0.114869
2,33.721146,0.002042,0.039298,0.104771
3,50.20293,0.000873,0.033019,0.083525
4,45.588966,0.002968,0.019837,0.121636


In [12]:
metric.sort_values(by=['nDCG'],ascending=False).head(10)

Unnamed: 0,features,learning rate,reg,nDCG
63,3.8439,0.002552,0.047335,0.147466
85,4.228417,0.002272,0.039216,0.147194
72,4.402475,0.002379,0.034582,0.147043
57,4.376752,0.002533,0.042355,0.146957
53,3.643409,0.003098,0.032038,0.145998
22,1.812813,0.004871,0.031042,0.145441
27,1.893767,0.003178,0.005527,0.144529
68,4.746698,0.002824,0.042629,0.144052
29,1.427377,0.00211,0.039016,0.143807
96,3.343649,0.001901,0.036711,0.143033


In [3]:
metric = pd.read_csv('C:\\Users\\Alejo\\Tesis\\Algoritmos\\FunkSVD\\results\\metric_nDCG.csv')
metric

Unnamed: 0,features,learning rate,reg,nDCG
0,4.226609,0.004590,0.027922,0.137349
1,45.198835,0.004488,0.016684,0.114869
2,33.721146,0.002042,0.039298,0.104771
3,50.202930,0.000873,0.033019,0.083525
4,45.588966,0.002968,0.019837,0.121636
...,...,...,...,...
95,11.107693,0.002289,0.025000,0.136107
96,3.343649,0.001901,0.036711,0.143033
97,31.924690,0.002799,0.033445,0.116036
98,98.997384,0.003625,0.040932,0.076007


In [4]:
%matplotlib qt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
colormap = cm.hot
x = metric['reg']
y = metric['learning rate']
z = metric['features'] 
c = metric['nDCG']


fig = plt.figure()

ax = fig.add_subplot(111, projection='3d')
img = ax.scatter(x, y, z, c=c, cmap=plt.hsv())
ax.set_xlabel('reg')
ax.set_ylabel('learning rate')
ax.set_zlabel('features')
fig.colorbar(img)
plt.show()