<img src="https://www3.um.edu.uy/logoum.jpg" width=300>
<h1 align="center">Thesis - Popular F1</h1> 
<h2 align="center">Alejo Paullier</h2> 

https://lkpy.lenskit.org/en/stable/knn.html

In [1]:
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.basic import Popular
from lenskit.metrics.predict import rmse
import pandas as pd
import numpy as np

ratings = pd.read_csv('D:\\Escritorio\\UM\\Tesis\\ML 1M\\ratings.dat', sep='::',engine='python',
                      names=['user', 'item', 'rating', 'timestamp'])


def eval(aname, algo, train, test):
    fittable = util.clone(algo) # Object cloning means to create an exact copy of the original object.
    fittable = Recommender.adapt(fittable) 
    fittable.fit(train) # Entrenamos el algoritmo con el training dataset
    users = test.user.unique() # Devuelve un array con los users unicos en el testing dataset
    # now we run the recommender
    recs = batch.recommend(fittable, users, 100)
    # add the algorithm name for analyzability
    recs['Algorithm'] = aname
    return recs

In [2]:
def model_trainer():
    all_recs = []
    test_data = []
    pop = Popular(selector=None) # define algorithm

    for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
        test_data.append(test) # save testing data
        all_recs.append(eval('Popular', pop, train, test))
    all_recs = pd.concat(all_recs, ignore_index=True)
    
    test_data = pd.concat(test_data, ignore_index=True)
    rla1 = topn.RecListAnalysis()
    rla2 = topn.RecListAnalysis()
    rla1.add_metric(topn.precision)
    rla2.add_metric(topn.recall)
    prec = rla1.compute(all_recs, test_data)
    rec = rla2.compute(all_recs, test_data)
    F1 = ((prec['precision'].values*rec['recall'].values*2)/(prec['precision'].values+rec['recall'].values))
    F1 = np.nan_to_num(F1)
    return [F1.mean(),prec['precision'].mean(),rec['recall'].mean()]

In [3]:
%%time
array = []
for i in range(1,100,1):
    array.append(model_trainer())



Wall time: 1h 1min 39s


In [4]:
metric = pd.DataFrame(array)
metric.columns = ['F1','Precision','Recall']
metric.head(5)

Unnamed: 0,F1,Precision,Recall
0,0.113434,0.088245,0.29561
1,0.112927,0.087843,0.296438
2,0.112805,0.087788,0.295686
3,0.112772,0.087727,0.295083
4,0.112992,0.0878,0.297408


In [5]:
metric.sort_values(by=['F1'],ascending=False).head(10)

Unnamed: 0,F1,Precision,Recall
5,0.114005,0.088671,0.299125
31,0.113883,0.088606,0.298389
93,0.113805,0.088523,0.298386
62,0.113778,0.088427,0.299806
17,0.113764,0.088416,0.298332
71,0.11372,0.088396,0.298576
8,0.113693,0.088503,0.297286
97,0.113669,0.088346,0.300704
49,0.113652,0.088296,0.297514
23,0.11365,0.088526,0.294902


In [6]:
metric.to_csv('metric_F1.csv', index=False)