In [None]:
!pip install pyreclab



In [None]:
import pyreclab
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/Colab Notebooks

/content/drive/MyDrive/Colab Notebooks


Basado en: @inproceedings{1706.06291v2, author = {Gabriel Sepulveda and Vicente Dominguez and Denis Parra}, title = {pyRecLab: A Software Library for Quick Prototyping of Recommender Systems}, year = {2017}, month = {August}, eprint = {arXiv:1706.06291v2}, keywords = {Recommender Systems, Software Development, Recommender Library, Python Library} }

# Base de datos

Primero se obtiene el itemset a evaluar para luego dividirlo en dos muestras estratificadas por género : una muestra de entrenamiento y otra de testeo

In [None]:
#Se carga la base de datos de los usuarios, se divide por genero y se subdivide 
#en una muestra de entrenamiento y una de testing
usuarios = pd.read_csv("usuarios.csv", sep = ";")
#Se restringe a calificaciones mayores a 5 para obtener una distribución normal
usuarios = usuarios[usuarios.rating >= 5]
labels, levels = pd.factorize(usuarios.user)
usuarios.user = labels

hombres = usuarios[usuarios.gender == "Male"]
hombres =  hombres.drop(["gender"], axis = 1)
mujeres = usuarios[usuarios.gender == "Female"]
mujeres =  mujeres.drop(["gender"], axis = 1)

hombres_train, hombres_test = train_test_split(hombres, test_size=0.2)
mujeres_train, mujeres_test = train_test_split(mujeres, test_size=0.2)


hombres_train.to_csv('hombres_train.csv', sep = ';', index=False)
hombres_test.to_csv('hombres_test.csv', sep = ';', index=False)
mujeres_train.to_csv('mujeres_train.csv', sep = ';', index=False)
mujeres_test.to_csv('mujeres_test.csv', sep = ';', index=False)

# Algoritmos de recomendación

# Para hombres

## User Average

In [None]:
user_avg_hombre = pyreclab.UserAvg( dataset = "hombres_train.csv",
                            header = False,
                            dlmchar = b';',
                            usercol = 0,
                            itemcol = 1,
                            ratingcol = 2 )
user_avg_hombre.train( progress = False )

In [None]:
predlist, mae, rmse = user_avg_hombre.test( input_file = 'hombres_test.csv',
                                     dlmchar = b';',
                                     header = False,
                                     usercol = 0,
                                     itemcol = 1,
                                     ratingcol = 2)
# Mean Absolute Error y Root Mean Squared Error
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")

MAE: 0.9255193081390524
RMSE: 1.1570243658387147


In [None]:
recommendationList, map, ndcg = user_avg_hombre.testrec( input_file = 'hombres_test.csv',
                                                dlmchar = b';',
                                                header = False,
                                                usercol = 0,
                                                itemcol = 1,
                                                ratingcol = 2,
                                                topn = 10)
# Mean Average Precision y Normalized Discounted Cumulative Gain
print(f"MAP@10: {map}")
print(f"NDCGg@10: {ndcg}")

MAP@10: 0.00023381737705641944
NDCGg@10: 7.846269403460716e-05


## Item Average	

In [None]:
item_avg_hombre = pyreclab.ItemAvg( dataset = "hombres_train.csv",
                            header = False,
                            dlmchar = b';',
                            usercol = 0,
                            itemcol = 1,
                            ratingcol = 2 )
item_avg_hombre.train( progress = False )

In [None]:
predlist2, mae2, rmse2 = item_avg_hombre.test( input_file = 'hombres_test.csv',
                                     dlmchar = b';',
                                     header = False,
                                     usercol = 0,
                                     itemcol = 1,
                                     ratingcol = 2)
# Mean Absolute Error y Root Mean Squared Error
print( 'MAE: ' + str( mae2 ) )
print( 'RMSE: ' + str( rmse2 ) )

MAE: 0.9775413331230389
RMSE: 1.2216119695603154


In [None]:
recommendList2, maprec2, ndcg2 = item_avg_hombre.testrec( input_file = 'hombres_test.csv',
                                                dlmchar = b';',
                                                header = False,
                                                usercol = 0,
                                                itemcol = 1,
                                                ratingcol = 2,
                                                topn = 10)
# Mean Average Precision y Normalized Discounted Cumulative Gain
print( 'MAP: %f' % ( maprec2 ) )
print( 'nDCG: %f' % ( ndcg2 ) )

MAP: 0.000246
nDCG: 0.000054


## SlopeOne

In [None]:
slope_one_hombre = pyreclab.SlopeOne( dataset = "hombres_train.csv",
                            header = False,
                            dlmchar = b';',
                            usercol = 0,
                            itemcol = 1,
                            ratingcol = 2 )
slope_one_hombre.train( progress = False )

In [None]:
predlist3, mae3, rmse3 = slope_one_hombre.test( input_file = 'hombres_test.csv',
                                     dlmchar = b';',
                                     header = False,
                                     usercol = 0,
                                     itemcol = 1,
                                     ratingcol = 2)
# Mean Absolute Error y Root Mean Squared Error
print( 'MAE: ' + str( mae3 ) )
print( 'RMSE: ' + str( rmse3 ) )

MAE: 2.9651703524557953
RMSE: 3.2453181910451603


In [None]:
recommendList3, maprec3, ndcg3 = slope_one_hombre.testrec( input_file = 'hombres_test.csv',
                                                dlmchar = b';',
                                                header = False,
                                                usercol = 0,
                                                itemcol = 1,
                                                ratingcol = 2,
                                                topn = 10)
# Mean Average Precision y Normalized Discounted Cumulative Gain
print( 'MAP: %f' % ( maprec3 ) )
print( 'nDCG: %f' % ( ndcg3 ) )

MAP: 0.000027
nDCG: 0.000016


# Para mujeres

## User Average

In [None]:
item_avg_mujer = pyreclab.ItemAvg( dataset = "mujeres_train.csv",
                            header = False,
                            dlmchar = b';',
                            usercol = 0,
                            itemcol = 1,
                            ratingcol = 2 )
item_avg_mujer.train( progress = False )

In [None]:
predlist4, mae4, rmse4 = item_avg_mujer.test( input_file = 'mujeres_test.csv',
                                     dlmchar = b';',
                                     header = False,
                                     usercol = 0,
                                     itemcol = 1,
                                     ratingcol = 2)
# Mean Absolute Error y Root Mean Squared Error
print(f"MAE: {mae4}")
print(f"RMSE: {rmse4}")

MAE: 1.056554236979082
RMSE: 1.29881308567845


In [None]:
recommendationList4, map4, ndcg4 = item_avg_mujer.testrec( input_file = 'mujeres_test.csv',
                                                dlmchar = b';',
                                                header = False,
                                                usercol = 0,
                                                itemcol = 1,
                                                ratingcol = 2,
                                                topn = 10)
# Mean Average Precision y Normalized Discounted Cumulative Gain
print(f"MAP@10: {map4}")
print(f"NDCGg@10: {ndcg4}")

MAP@10: 5.585152139203166e-05
NDCGg@10: 2.656372854349857e-05


## Item Average	

In [None]:
item_avg_mujer = pyreclab.ItemAvg( dataset = "mujeres_train.csv",
                            header = False,
                            dlmchar = b';',
                            usercol = 0,
                            itemcol = 1,
                            ratingcol = 2 )
item_avg_mujer.train( progress = False )

In [None]:
predlist5, mae5, rmse5 = item_avg_mujer.test( input_file = 'mujeres_test.csv',
                                     dlmchar = b';',
                                     header = False,
                                     usercol = 0,
                                     itemcol = 1,
                                     ratingcol = 2)
# Mean Absolute Error y Root Mean Squared Error
print( 'MAE: ' + str( mae5 ) )
print( 'RMSE: ' + str( rmse5 ) )

MAE: 1.056554236979082
RMSE: 1.29881308567845


In [None]:
recommendList5, maprec5, ndcg5 = item_avg_mujer.testrec( input_file = 'mujeres_test.csv',
                                                dlmchar = b';',
                                                header = False,
                                                usercol = 0,
                                                itemcol = 1,
                                                ratingcol = 2,
                                                topn = 10)
# Mean Average Precision y Normalized Discounted Cumulative Gain
print( 'MAP: %f' % ( maprec5 ) )
print( 'nDCG: %f' % ( ndcg5 ) )

MAP: 0.000056
nDCG: 0.000027


## SlopeOne

In [None]:
slope_one_mujer = pyreclab.SlopeOne( dataset = "mujeres_train.csv",
                            header = False,
                            dlmchar = b';',
                            usercol = 0,
                            itemcol = 1,
                            ratingcol = 2 )
slope_one_mujer.train( progress = False )

In [None]:
predlist6, mae6, rmse6 = slope_one_mujer.test( input_file = 'mujeres_test.csv',
                                     dlmchar = b';',
                                     header = False,
                                     usercol = 0,
                                     itemcol = 1,
                                     ratingcol = 2)
# Mean Absolute Error y Root Mean Squared Error
print( 'MAE: ' + str( mae6 ) )
print( 'RMSE: ' + str( rmse6 ) )

MAE: 3.036010109962013
RMSE: 3.3392635638287578


In [None]:
recommendList6, maprec6, ndcg6 = slope_one_mujer.testrec( input_file = 'mujeres_test.csv',
                                                dlmchar = b';',
                                                header = False,
                                                usercol = 0,
                                                itemcol = 1,
                                                ratingcol = 2,
                                                topn = 10)
# Mean Average Precision y Normalized Discounted Cumulative Gain
print( 'MAP: %f' % ( maprec6 ) )
print( 'nDCG: %f' % ( ndcg6 ) )

MAP: 0.000025
nDCG: 0.000013
