# 3er Entregable

Integrantes:
- Araoz, Tania
- Bajo, Pablo
- Barrera, Manuel

### Carga de librerias a utilizar 

In [386]:
import pandas as pd
from datetime import datetime
from scipy.sparse import csr_matrix
from lightfm import LightFM
import numpy as np

### Carga de datasets

In [387]:
movies = pd.read_csv("../data/ml-latest/movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [388]:
ratings = pd.read_csv("../data/ml-latest/ratings.csv")
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


> Se usa el dataset de ratings para trabajar, tiene las interacciones entre usuarios y películas

In [389]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB


> El dataset contiene 100836 interacciones. <span style="color:red">ACTUALIZAR CON DATASET GRANDE</span>

> El timestamp está en formato int64, se debe convertir a formato fecha para poder trabajar.

In [390]:
ratings.isna().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

> No hay valores nulos

In [391]:
ratings['userId'].nunique()

610

> El dataset tiene 610 ususarios. <span style="color:red">Cambiar con dataset grande</span> 

In [392]:
ratings['movieId'].nunique()

9724

> el dataset contiene ratings de 9724 peliculas. <span style="color:red">Actualizar con dataset grande</span> 

In [393]:
ratings['rating'].sort_values(ascending=True).unique()

array([0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])

> Los valores posibles de ratings van del 0.5 al 5, con un incremento de 0.5. 

#### Preprocesado

Convertimos el timestamp numerico en formato fecha

In [394]:
ratings["timestamp"] = ratings["timestamp"].apply(lambda x: datetime.utcfromtimestamp(x).strftime('%Y/%m/%d'))

In [395]:
ratings["timestamp"]

0         2000/07/30
1         2000/07/30
2         2000/07/30
3         2000/07/30
4         2000/07/30
             ...    
100831    2017/05/03
100832    2017/05/03
100833    2017/05/08
100834    2017/05/03
100835    2017/05/03
Name: timestamp, Length: 100836, dtype: object

> Vemos que la fecha tiene un formato de fecha, pero la columna es de tipo object

Utilizando pandas convertimos a un formato de fechas que permita el filtrado

In [396]:
ratings["timestamp"] = pd.to_datetime(ratings['timestamp'], format='%Y/%m/%d')

In [397]:
ratings["timestamp"]

0        2000-07-30
1        2000-07-30
2        2000-07-30
3        2000-07-30
4        2000-07-30
            ...    
100831   2017-05-03
100832   2017-05-03
100833   2017-05-08
100834   2017-05-03
100835   2017-05-03
Name: timestamp, Length: 100836, dtype: datetime64[ns]

> Vemos que la columna tiene el formato datetime64

In [398]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,2000-07-30
1,1,3,4.0,2000-07-30
2,1,6,4.0,2000-07-30
3,1,47,5.0,2000-07-30
4,1,50,5.0,2000-07-30


Vemos el rango de fechas del dataset

In [399]:
ratings.timestamp.min()

Timestamp('1996-03-29 00:00:00')

In [400]:
ratings.timestamp.max()

Timestamp('2018-09-24 00:00:00')

> Vemos que el rango de fechas va desde el 29/03/1996 al 24/09/24

#### Dividimos dataset en train, test y validation
Vemos la catidad de ratings por año

In [401]:
plot_df = ratings.copy()
plot_df["year"] = ratings.timestamp.dt.year
plot_df = plot_df.groupby("year", as_index=False).count()[["year", "userId"]]
plot_df.columns = ["year", "reviews_count"]
plot_df.head(25)

Unnamed: 0,year,reviews_count
0,1996,6040
1,1997,1916
2,1998,507
3,1999,2439
4,2000,10061
5,2001,3922
6,2002,3478
7,2003,4014
8,2004,3279
9,2005,5813


> Tomamos una proporción 80/20 para dividir el dataset en train - test

In [402]:
train = ratings[(ratings.timestamp < datetime(year=2016, month=1, day=1))]
train.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,2000-07-30
1,1,3,4.0,2000-07-30
2,1,6,4.0,2000-07-30
3,1,47,5.0,2000-07-30
4,1,50,5.0,2000-07-30


In [403]:
train.shape

(79517, 4)

In [404]:
train.userId.nunique()

514

In [405]:
train.movieId.nunique()

7789

In [406]:
test = ratings[ratings.timestamp >= datetime(year=2016, month=1, day=1)]
test.head()

Unnamed: 0,userId,movieId,rating,timestamp
1119,10,296,1.0,2016-02-12
1120,10,356,3.5,2016-02-12
1121,10,588,4.0,2016-02-12
1122,10,597,3.5,2016-02-13
1123,10,912,4.0,2016-02-12


In [407]:
test.shape

(21319, 4)

In [408]:
test.userId.nunique()

120

In [409]:
test.movieId.nunique()

5714

In [410]:
plot_df = train.copy()
plot_df["year"] = train.timestamp.dt.year
plot_df = plot_df.groupby("year", as_index=False).count()[["year", "userId"]]
plot_df.columns = ["year", "reviews_count"]
plot_df.head(25)

Unnamed: 0,year,reviews_count
0,1996,6040
1,1997,1916
2,1998,507
3,1999,2439
4,2000,10061
5,2001,3922
6,2002,3478
7,2003,4014
8,2004,3279
9,2005,5813


In [411]:
train.shape

(79517, 4)

> Definimos el conjunto de validación, en función de nuestro conjunto de entrenamiento. <span style="color:red">Actualizar con dataset grande</span>

In [412]:
validation = train[train.timestamp >= datetime(year=2014, month=1, day=1)]
validation.head()

Unnamed: 0,userId,movieId,rating,timestamp
232,2,318,3.0,2015-10-24
233,2,333,4.0,2015-10-24
234,2,1704,4.5,2015-10-24
235,2,3578,4.0,2015-10-24
236,2,6874,4.0,2015-10-24


In [413]:
validation.shape

(8055, 4)

In [414]:
validation.userId.nunique()

69

In [415]:
validation.movieId.nunique()

2732

In [416]:
plot_df = validation.copy()
plot_df["year"] = validation.timestamp.dt.year
plot_df = plot_df.groupby("year", as_index=False).count()[["year", "userId"]]
plot_df.columns = ["year", "reviews_count"]
plot_df.head(25)

Unnamed: 0,year,reviews_count
0,2014,1439
1,2015,6616


> Redefinimos el conjunto de entrenamiento. <span style="color:red">Actualizar con dataset grande</span>

In [417]:
train = train[(train.timestamp < datetime(year=2014, month=1, day=1))]
train.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,2000-07-30
1,1,3,4.0,2000-07-30
2,1,6,4.0,2000-07-30
3,1,47,5.0,2000-07-30
4,1,50,5.0,2000-07-30


In [418]:
plot_df = train.copy()
plot_df["year"] = train.timestamp.dt.year
plot_df = plot_df.groupby("year", as_index=False).count()[["year", "userId"]]
plot_df.columns = ["year", "reviews_count"]
plot_df.head(25)

Unnamed: 0,year,reviews_count
0,1996,6040
1,1997,1916
2,1998,507
3,1999,2439
4,2000,10061
5,2001,3922
6,2002,3478
7,2003,4014
8,2004,3279
9,2005,5813


In [419]:
train.shape

(71462, 4)

¿Tenemos COLDSTAR? 

In [420]:
test[~test.userId.isin(train.userId.unique())].userId.nunique()

109

> Tenemos ### Usuarios que se encuentra en el dataset de test y no en el de train. <span style="color:red">Actualizar con dataset grande</span>

In [421]:
validation[~validation.userId.isin(train.userId.unique())].userId.nunique()

55

> Tenemos ### Usuarios que se encuentra en el dataset de validation y no en el de train. <span style="color:red">Actualizar con dataset grande</span>

#### Matriz de Interacciones

In [422]:
interactions_train = train[["userId", "movieId", "rating"]].copy()
interactions_train.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [423]:
interactions_matrix = interactions_train.pivot(index="userId", columns="movieId", values="rating")

In [424]:
interactions_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,104241,104245,104339,104841,104879,105037,105213,105504,105755,107348
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
6,,4.0,5.0,3.0,5.0,4.0,4.0,3.0,,3.0,...,,,,,,,,,,


In [425]:
interactions_matrix = interactions_matrix.fillna(0)

In [426]:
interactions_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,104241,104245,104339,104841,104879,105037,105213,105504,105755,107348
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,4.0,5.0,3.0,5.0,4.0,4.0,3.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [427]:
interactions_matrix.shape

(459, 7122)

In [428]:
interactions_matrix_csr = csr_matrix(interactions_matrix.values)

In [429]:
interactions_matrix_csr

<459x7122 sparse matrix of type '<class 'numpy.float64'>'
	with 71462 stored elements in Compressed Sparse Row format>

In [430]:
user_ids = list(interactions_matrix.index)
user_map = {}
counter = 0
for i in user_ids:
    user_map[i] = counter
    counter += 1

In [431]:
user_map

{1: 0,
 3: 1,
 4: 2,
 5: 3,
 6: 4,
 7: 5,
 8: 6,
 9: 7,
 11: 8,
 12: 9,
 13: 10,
 14: 11,
 15: 12,
 16: 13,
 17: 14,
 19: 15,
 20: 16,
 21: 17,
 22: 18,
 23: 19,
 26: 20,
 27: 21,
 28: 22,
 29: 23,
 31: 24,
 32: 25,
 33: 26,
 34: 27,
 35: 28,
 36: 29,
 37: 30,
 38: 31,
 39: 32,
 40: 33,
 42: 34,
 43: 35,
 44: 36,
 45: 37,
 46: 38,
 48: 39,
 51: 40,
 53: 41,
 54: 42,
 55: 43,
 56: 44,
 57: 45,
 58: 46,
 59: 47,
 61: 48,
 64: 49,
 66: 50,
 68: 51,
 69: 52,
 70: 53,
 71: 54,
 72: 55,
 74: 56,
 75: 57,
 78: 58,
 79: 59,
 80: 60,
 81: 61,
 82: 62,
 83: 63,
 84: 64,
 85: 65,
 86: 66,
 87: 67,
 88: 68,
 90: 69,
 91: 70,
 92: 71,
 93: 72,
 94: 73,
 95: 74,
 96: 75,
 97: 76,
 99: 77,
 100: 78,
 101: 79,
 102: 80,
 104: 81,
 107: 82,
 108: 83,
 109: 84,
 110: 85,
 113: 86,
 115: 87,
 116: 88,
 117: 89,
 118: 90,
 120: 91,
 121: 92,
 124: 93,
 126: 94,
 127: 95,
 128: 96,
 129: 97,
 130: 98,
 131: 99,
 132: 100,
 133: 101,
 134: 102,
 135: 103,
 136: 104,
 137: 105,
 138: 106,
 140: 107,
 142: 10

#### Modelo

In [432]:
model = LightFM(no_components=50, random_state=100, learning_rate=0.03, loss='logistic')

In [433]:
%%time
model = model.fit(interactions_matrix_csr, epochs=100,num_threads=6)

CPU times: total: 26.1 s
Wall time: 45.9 s


In [434]:
model

<lightfm.lightfm.LightFM at 0x1ea7f6d0d90>

#### Metodos auxiliares
Metodo para obtener las recomendaciones en caso de ColdStart

In [435]:
def getColdStarRecomm(dataset, no_recom): 
    '''
    Esta funcion recibe el dataset y retonar una lista de tamaño no_recom con recomendaciones mas populares 
    '''
    recomm = dataset.groupby("movieId", as_index=False).agg({"userId":"nunique"}).sort_values(by="userId", ascending=False)
    recomm.columns=["movieId", "popularity"]
    return recomm.movieId.values[:no_recom]

Metodo para obtener los scores a partir del modelo

In [436]:
def predict(user):
    '''
    Esta funcion recibe un usuario y retorna lista de scores
    '''
    user_predic = user_map[user]
    n_users, n_items = interactions_matrix.shape
    item_ids = np.arange(n_items)
    return model.predict(user_ids=user_predic, item_ids = item_ids)

Metodo para obtener las recomendaciones ordenadas

In [437]:
def getOrderedMoviesId(preds, no_recom):
    '''
    Esta funcion recibe los scores predecidos y retorna lista ordenada de moviesId
    '''
    recomm = pd.Series(preds)
    recomm.index = interactions_matrix.columns
    return list(pd.Series(recomm.sort_values(ascending=False).index))[:no_recom]

Metodo para eliminar de las recomendaciones las peliculas ya vistas

In [438]:
def getNotWatchedMovieId(user, recomm):
    '''
    Esta funcion recibe las recomendaciones y retorna lista de recomendaciones sin las peliculas vistas por el usuario
    '''
    watched = train[train.userId == user].movieId.unique()
    return [x for x in recomm if x not in watched][:20]

In [439]:
def recomm(user, no_recom):
    result = None
    if user in list(interactions_matrix.index):
        score = predict(user)
        recomm = getOrderedMoviesId(score, no_recom)
        result = getNotWatchedMovieId(user, recomm)
    else:
        result = getColdStarRecomm(train, 20)
    return result

#### Recomendaciones
Generamos metricas para todos los ususarios de validation

In [440]:
def recommAll_Validation(no_recom):
    recomms_dict = {
        'user_id': [],
        'recomms': []
    }
    for user in validation.userId.unique():
        recomms_dict['user_id'].append(user)
        recomms_dict['recomms'].append(recomm(user, no_recom))
    return pd.DataFrame(recomms_dict)


In [441]:
pd.options.display.max_colwidth = None
df = recommAll_Validation(100)
df

Unnamed: 0,user_id,recomms
0,2,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
1,21,"[296, 356, 318, 110, 593, 480, 150, 592, 590, 380, 589, 457, 344, 780, 588, 153, 1, 527, 47, 377]"
2,29,"[110, 593, 480, 592, 590, 380, 457, 589, 344, 588, 153, 1, 527, 32, 47, 377, 349, 260, 231, 595]"
3,60,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
4,63,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
...,...,...
64,573,"[590, 344, 527, 32, 349, 161, 434, 292, 208, 454, 329, 253, 608, 858, 34, 339, 597, 300, 587, 539]"
65,581,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
66,582,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
67,598,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"


In [442]:
df.columns

Index(['user_id', 'recomms'], dtype='object')

> Como de los ## usuarios del dataset validation, ## son usuarios nuevos, les asigna las recomendaciones coldstart <span style="color:red">Actualizar con dataset grande</span>

In [443]:
df.user_id.nunique()

69

#### Comparación

> Primero generamos la recomendaciones ideales del conjunto de validación.

In [444]:
interactions_validation = validation[["userId", "movieId", "rating"]].copy()
interactions_validation.head()

Unnamed: 0,userId,movieId,rating
232,2,318,3.0
233,2,333,4.0
234,2,1704,4.5
235,2,3578,4.0
236,2,6874,4.0


In [445]:
interactions_validation.columns

Index(['userId', 'movieId', 'rating'], dtype='object')

In [446]:
ideal_recomms = interactions_validation.sort_values(by=["userId", "rating"], ascending=False)\
                  .groupby("userId", as_index=False)\
                  .agg({"movieId": "unique"})
ideal_recomms

Unnamed: 0,userId,movieId
0,2,"[60756, 80906, 89774, 106782, 122882, 131724, 1704, 58559, 68157, 80489, 333, 3578, 6874, 46970, 48516, 74458, 79132, 86345, 112552, 8798, 91529, 99114, 115713, 318, 71535, 77455, 109487, 91658, 114060]"
1,21,"[10, 1270, 2011, 2012, 7573, 260, 356, 648, 1196, 1210, 1544, 1580, 2947, 2948, 2949, 2989, 2990, 2991, 2993, 3633, 3635, 3638, 3639, 3984, 4489, 4963, 5445, 7569, 7570, 8529, 8984, 33004, 33493, 53121, 53322, 58998, 68954, 78637, 101864, 111759, 111781, 135887, 364, 480, 588, 597, 743, 1198, 1291, 2115, 2424, 2529, 2571, 2628, 2671, 2763, 2916, 3022, 3253, 3868, 3869, 4005, 4306, 4545, 4896, 5218, 5378, 5418, 5574, 5816, 6539, 6934, 6942, 7143, 8360, 8368, 8644, 8665, 8798, 8972, 33615, 40815, 47566, 49272, 53125, 54286, 59315, 59615, 69644, 69844, 72998, 77561, 78499, 79185, 82202, 85259, 88125, 89745, 91630, 94677, ...]"
2,29,"[1408, 5464, 6502, 111362, 4223, 5010, 104841, 111759]"
3,60,"[527, 858, 58559, 318, 362, 783, 805, 1242, 2150, 2739, 3386, 3424, 6016, 48, 50, 60, 455, 832, 1203, 1562, 2067, 2724]"
4,63,"[1, 50, 260, 296, 318, 344, 745, 1080, 1136, 1148, 1196, 1198, 1208, 1220, 1223, 1270, 1288, 2078, 2716, 2788, 2858, 2959, 3949, 5669, 6104, 7361, 8874, 32587, 33779, 38038, 48774, 51255, 57669, 58559, 77455, 77800, 79702, 86290, 89753, 89904, 91529, 92494, 92535, 97913, 98491, 102217, 106696, 108932, 111781, 115617, 134853, 608, 1210, 1965, 2542, 2692, 6016, 6350, 7980, 48516, 64285, 81845, 96610, 102445, 47, 165, 357, 364, 367, 527, 588, 589, 592, 648, 778, 858, 912, 913, 953, 1036, 1084, 1193, 1201, 1202, 1203, 1213, 1221, 1262, 1282, 1291, 1617, 1968, 2115, 2329, 2791, 3088, 3435, 3481, 3535, 3751, ...]"
...,...,...
64,573,"[858, 109487, 111362, 112852]"
65,581,"[318, 527, 3147, 4896, 5816, 5995, 7361, 356, 2324, 2762, 2959, 3949, 4022, 4226, 4306, 4886, 4993, 4995, 5349, 5952, 6377, 7153, 8368, 44191, 79132, 81845, 92259, 109487, 112552, 1704, 59315, 60069, 68954, 116797, 134130, 2571, 4973, 5989, 7147, 48394]"
66,582,"[2571, 79091, 79132, 81834, 88125, 89745, 92259, 49272, 58559, 69844, 76093, 91529, 96079, 99114, 102125, 104841, 109487, 260, 1196, 48516, 54001, 60069, 68157, 74458, 81229, 81564, 87232, 91500, 91630, 94864, 109374, 134130, 4993, 5618, 44191, 48780, 68954, 73321, 115617, 84954, 97752, 97913, 76251, 77561, 85414, 96610]"
67,598,"[5816, 7361, 46578, 54001, 56367, 79132, 101577, 103543, 113829, 114265, 124851, 130490, 4963, 5418, 63082, 1197, 110771, 4226, 4306, 8360, 593]"


In [447]:
ideal_recomms.columns

Index(['userId', 'movieId'], dtype='object')

> Proximo paso sería hacer un nuevo dataframe, en donde combinariamos lo recomendado con la lista ideal para poder hacer la comparación.

In [448]:
merged_df = pd.merge(df, ideal_recomms, left_on='user_id', right_on='userId', how='inner')
merged_df = merged_df.rename(columns={'recomms': 'recomms_df', 'movieId': 'recomms_ideal'})
merged_df = merged_df.drop(columns=['userId'])

merged_df



Unnamed: 0,user_id,recomms_df,recomms_ideal
0,2,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[60756, 80906, 89774, 106782, 122882, 131724, 1704, 58559, 68157, 80489, 333, 3578, 6874, 46970, 48516, 74458, 79132, 86345, 112552, 8798, 91529, 99114, 115713, 318, 71535, 77455, 109487, 91658, 114060]"
1,21,"[296, 356, 318, 110, 593, 480, 150, 592, 590, 380, 589, 457, 344, 780, 588, 153, 1, 527, 47, 377]","[10, 1270, 2011, 2012, 7573, 260, 356, 648, 1196, 1210, 1544, 1580, 2947, 2948, 2949, 2989, 2990, 2991, 2993, 3633, 3635, 3638, 3639, 3984, 4489, 4963, 5445, 7569, 7570, 8529, 8984, 33004, 33493, 53121, 53322, 58998, 68954, 78637, 101864, 111759, 111781, 135887, 364, 480, 588, 597, 743, 1198, 1291, 2115, 2424, 2529, 2571, 2628, 2671, 2763, 2916, 3022, 3253, 3868, 3869, 4005, 4306, 4545, 4896, 5218, 5378, 5418, 5574, 5816, 6539, 6934, 6942, 7143, 8360, 8368, 8644, 8665, 8798, 8972, 33615, 40815, 47566, 49272, 53125, 54286, 59315, 59615, 69644, 69844, 72998, 77561, 78499, 79185, 82202, 85259, 88125, 89745, 91630, 94677, ...]"
2,29,"[110, 593, 480, 592, 590, 380, 457, 589, 344, 588, 153, 1, 527, 32, 47, 377, 349, 260, 231, 595]","[1408, 5464, 6502, 111362, 4223, 5010, 104841, 111759]"
3,60,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[527, 858, 58559, 318, 362, 783, 805, 1242, 2150, 2739, 3386, 3424, 6016, 48, 50, 60, 455, 832, 1203, 1562, 2067, 2724]"
4,63,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[1, 50, 260, 296, 318, 344, 745, 1080, 1136, 1148, 1196, 1198, 1208, 1220, 1223, 1270, 1288, 2078, 2716, 2788, 2858, 2959, 3949, 5669, 6104, 7361, 8874, 32587, 33779, 38038, 48774, 51255, 57669, 58559, 77455, 77800, 79702, 86290, 89753, 89904, 91529, 92494, 92535, 97913, 98491, 102217, 106696, 108932, 111781, 115617, 134853, 608, 1210, 1965, 2542, 2692, 6016, 6350, 7980, 48516, 64285, 81845, 96610, 102445, 47, 165, 357, 364, 367, 527, 588, 589, 592, 648, 778, 858, 912, 913, 953, 1036, 1084, 1193, 1201, 1202, 1203, 1213, 1221, 1262, 1282, 1291, 1617, 1968, 2115, 2329, 2791, 3088, 3435, 3481, 3535, 3751, ...]"
...,...,...,...
64,573,"[590, 344, 527, 32, 349, 161, 434, 292, 208, 454, 329, 253, 608, 858, 34, 339, 597, 300, 587, 539]","[858, 109487, 111362, 112852]"
65,581,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[318, 527, 3147, 4896, 5816, 5995, 7361, 356, 2324, 2762, 2959, 3949, 4022, 4226, 4306, 4886, 4993, 4995, 5349, 5952, 6377, 7153, 8368, 44191, 79132, 81845, 92259, 109487, 112552, 1704, 59315, 60069, 68954, 116797, 134130, 2571, 4973, 5989, 7147, 48394]"
66,582,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[2571, 79091, 79132, 81834, 88125, 89745, 92259, 49272, 58559, 69844, 76093, 91529, 96079, 99114, 102125, 104841, 109487, 260, 1196, 48516, 54001, 60069, 68157, 74458, 81229, 81564, 87232, 91500, 91630, 94864, 109374, 134130, 4993, 5618, 44191, 48780, 68954, 73321, 115617, 84954, 97752, 97913, 76251, 77561, 85414, 96610]"
67,598,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[5816, 7361, 46578, 54001, 56367, 79132, 101577, 103543, 113829, 114265, 124851, 130490, 4963, 5418, 63082, 1197, 110771, 4226, 4306, 8360, 593]"


In [449]:
aps = []

for pred, label in merged_df[["recomms_ideal", "recomms_df"]].values:
  n = len(pred)
  arange = np.arange(n, dtype=np.int32) + 1.
  rel_k = np.in1d(pred[:n], label)
  tp = np.ones(rel_k.sum(), dtype=np.int32).cumsum()
  denom = arange[rel_k]
  ap = (tp / denom).sum() / len(label)
  aps.append(ap)

In [450]:
MAP = np.mean(aps)
print(f'mean average precision = {round(MAP, 5)}')

mean average precision = 0.08557


#### Metricas en test

In [451]:
def recommAll_test(no_recom):
    recomms_dict = {
        'user_id': [],
        'recomms': []
    }
    for user in test.userId.unique():
        recomms_dict['user_id'].append(user)
        recomms_dict['recomms'].append(recomm(user, no_recom))
    return pd.DataFrame(recomms_dict)

In [452]:
pd.options.display.max_colwidth = None
df_test = recommAll_test(100)
df_test

Unnamed: 0,user_id,recomms
0,10,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
1,15,"[296, 356, 318, 110, 593, 480, 150, 592, 380, 590, 589, 457, 344, 780, 588, 1, 153, 527, 32, 47]"
2,18,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
3,21,"[296, 356, 318, 110, 593, 480, 150, 592, 590, 380, 589, 457, 344, 780, 588, 153, 1, 527, 47, 377]"
4,24,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
...,...,...
115,586,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
116,596,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
117,599,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"
118,601,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]"


In [453]:
interactions_test = test[["userId", "movieId", "rating"]].copy()
interactions_test.head()

Unnamed: 0,userId,movieId,rating
1119,10,296,1.0
1120,10,356,3.5
1121,10,588,4.0
1122,10,597,3.5
1123,10,912,4.0


In [454]:
ideal_recomms2 = interactions_test.sort_values(by=["userId", "rating"], ascending=False)\
                  .groupby("userId", as_index=False)\
                  .agg({"movieId": "unique"})
ideal_recomms2

Unnamed: 0,userId,movieId
0,10,"[7458, 8533, 8869, 33794, 49272, 49286, 71579, 79091, 81845, 91529, 92259, 96079, 136020, 140110, 4306, 4447, 7169, 31685, 51705, 58559, 63992, 69406, 94070, 106696, 113275, 588, 912, 1907, 3578, 4993, 4995, 5952, 6535, 6942, 7149, 7153, 7154, 7375, 40819, 68954, 88163, 95167, 95449, 103335, 103339, 104374, 109853, 112006, 113394, 137595, 356, 597, 1784, 2671, 4246, 5377, 6377, 7293, 7451, 8529, 8636, 8665, 8969, 30749, 54286, 56367, 58047, 63113, 66203, 72330, 72720, 72737, 80549, 81847, 82167, 84374, 87222, 95543, 106489, 129428, 1088, 1247, 1307, 3882, 5066, 5620, 5943, 5957, 6155, 6266, 7151, 8808, 33145, 33679, 40629, 47099, 51662, 56949, 60397, 69844, ...]"
1,15,"[260, 318, 356, 527, 589, 1196, 1200, 1210, 1214, 1270, 2011, 3147, 3156, 3578, 4720, 4995, 5989, 33493, 84152, 122886, 152077, 166528, 1653, 2329, 2916, 48304, 48780, 84954, 104841, 111759, 112556, 134853, 296, 858, 1198, 1240, 2012, 2571, 2858, 3499, 3949, 4370, 5445, 64614, 71057, 97938, 101864, 105504, 109487, 134130, 47, 780, 1265, 2028, 3535, 4022, 4886, 4993, 5952, 6502, 7254, 8644, 56174, 60069, 68954, 70286, 79132, 85414, 103249, 158872, 160980, 166635, 293, 364, 588, 1527, 2081, 2762, 3753, 3994, 4306, 5618, 6377, 6874, 7438, 48774, 50872, 63859, 68237, 72998, 91500, 94864, 96610, 143385, 152081, 1, 2959, 8360, 8961, 71264, ...]"
2,18,"[50, 318, 923, 1201, 1203, 1209, 1221, 16, 47, 110, 235, 293, 356, 527, 589, 593, 608, 778, 904, 1080, 1136, 1148, 1193, 1206, 1207, 1210, 1212, 1213, 1219, 1222, 1223, 1227, 1234, 1247, 1356, 1374, 1732, 2324, 2542, 2571, 2762, 2951, 2959, 3052, 3275, 3578, 3681, 3949, 4011, 4226, 4993, 4995, 5008, 5120, 5995, 6300, 6440, 6539, 6807, 7147, 7153, 7254, 7843, 27716, 27878, 33794, 44191, 44199, 44665, 48516, 48780, 51540, 52604, 55118, 55290, 55765, 64197, 67255, 68073, 71108, 71899, 73017, 73323, 74458, 74510, 76251, 79132, 81788, 84392, 109487, 112334, 112552, 112852, 115713, 116797, 134130, 142488, 157108, 157110, 177593, ...]"
3,21,"[47997, 2717, 33679, 117529, 119145, 122886, 122896, 122922, 136020, 143385, 152081, 164179, 167036, 296, 541, 780, 2617, 6155, 30793, 32296, 34048, 36519, 50872, 51662, 53996, 60397, 63082, 68791, 69122, 69526, 72378, 73321, 78469, 79293, 79592, 87520, 90249, 91535, 95167, 96588, 108190, 112138, 114180, 115149, 116823, 122900, 122904, 135133, 135536, 138036, 164909, 166492, 168248, 902, 2052, 2953, 4700, 5219, 5254, 7373, 32587, 34150, 41566, 72641, 93510, 110553, 126548, 130450, 136016, 168252, 1573, 3697, 142536, 58025, 97913, 148675, 6874, 7438, 38038, 143245, 149380, 2174, 5266, 61160, 108932, 160565, 1391, 160872, 173307]"
4,24,"[6, 318, 356, 593, 1198, 1265, 3147, 5064, 6350, 27773, 50, 296, 608, 1197, 1246, 1396, 1527, 1580, 1704, 1784, 2028, 2115, 2424, 2571, 2686, 3578, 4027, 4262, 4489, 4855, 5418, 5673, 5791, 7143, 35836, 38061, 44191, 46976, 51662, 54286, 58559, 58998, 64957, 68358, 70286, 72011, 79132, 86882, 91529, 94777, 119145, 132660, 134130, 134853, 32, 165, 253, 316, 457, 552, 780, 1220, 1370, 1663, 1682, 2273, 2421, 2617, 2916, 4299, 4701, 4973, 4995, 7293, 31685, 33679, 34437, 49272, 52973, 57368, 59615, 61024, 91542, 96079, 102407, 111759, 122886, 733, 1297, 1639, 1653, 2134, 5445, 5903, 8784, 31696, 45672, 47610, 61132, 63113, ...]"
...,...,...
115,586,"[110, 318, 589, 1198, 1200, 1374, 1580, 1704, 2011, 2353, 2490, 2571, 3175, 3578, 3753, 3793, 4886, 4993, 5952, 6333, 6539, 7153, 8368, 8665, 33615, 45431, 45499, 45517, 47610, 50872, 59315, 59369, 59784, 60069, 62999, 63859, 68954, 76093, 77561, 79091, 86298, 86880, 87222, 93272, 95167, 96861, 98243, 101142, 103141, 106489, 106696, 110102, 112852, 117851, 118696, 120635, 122886, 122896, 122906, 122918, 122920, 122922, 122926, 134853, 135133, 136556, 149406, 152081, 160438, 166461, 168252, 168418, 179819, 187595, 161, 260, 380, 457, 553, 588, 1073, 1196, 1210, 1265, 1270, 1376, 1610, 3114, 5459, 27619, 33493, 41566, 42738, 52287, 54001, 54286, 54648, 58559, 65682, 78499, ...]"
116,596,"[2288, 3000, 4878, 5971, 31658, 33649, 57669, 110102, 122882, 122906, 122916, 166528, 167746, 168252, 904, 1192, 1356, 1688, 1748, 4226, 4342, 4720, 5444, 5618, 6350, 7615, 8874, 38061, 51255, 60069, 70286, 76093, 107406, 111913, 122886, 134853, 135569, 138036, 143355, 171917, 1, 34, 39, 260, 364, 527, 541, 581, 593, 595, 616, 919, 924, 1028, 1035, 1036, 1046, 1097, 1136, 1210, 1214, 1270, 1374, 1375, 1376, 1704, 1907, 2081, 2393, 2407, 2571, 2687, 2692, 2762, 2959, 3052, 3094, 3213, 3275, 3535, 3786, 3793, 3967, 3996, 4306, 4366, 4973, 4993, 5903, 6333, 6377, 6502, 6934, 8961, 27246, 27611, 33794, 34405, 37729, 39183, ...]"
117,599,"[112, 260, 293, 296, 741, 750, 924, 951, 1089, 1178, 1196, 1200, 1208, 1210, 1214, 1274, 1283, 1732, 1967, 2395, 2427, 2571, 2858, 2959, 3030, 3160, 3435, 3703, 3949, 4973, 6711, 6874, 6, 215, 541, 589, 720, 1080, 1129, 1136, 1148, 1206, 1215, 1223, 1249, 1377, 1704, 1945, 2692, 2716, 3087, 3334, 3379, 3468, 3503, 3741, 4226, 4467, 4848, 5669, 6440, 7387, 7748, 27156, 68945, 80463, 96004, 170355, 21, 47, 318, 329, 431, 480, 745, 858, 861, 912, 923, 928, 930, 1036, 1050, 1095, 1097, 1183, 1204, 1213, 1221, 1225, 1230, 1232, 1236, 1237, 1242, 1244, 1246, 1265, 1282, 1291, ...]"
118,601,"[904, 1197, 1203, 2324, 5618, 31658, 48516, 50872, 59315, 60069, 68954, 76093, 134853, 166024, 170705, 908, 953, 1704, 2355, 2762, 3578, 5971, 33794, 48780, 67255, 70286, 74458, 80463, 81834, 88810, 89745, 91529, 106782, 115617, 152081, 172591, 177765, 1, 47, 912, 1136, 1193, 1198, 1207, 1527, 1721, 1917, 3000, 4306, 6016, 44191, 48394, 49272, 54286, 63082, 68157, 72378, 72998, 74946, 78499, 80549, 81845, 95167, 99114, 112852, 122918, 168326, 170697, 174055, 176371, 1584, 3114, 46578, 122916]"


In [455]:
merged_df_test = pd.merge(df, ideal_recomms2, left_on='user_id', right_on='userId', how='inner')
merged_df_test = merged_df_test.rename(columns={'recomms': 'recomms_df', 'movieId': 'recomms_ideal'})
merged_df_test = merged_df_test.drop(columns=['userId'])

merged_df_test

Unnamed: 0,user_id,recomms_df,recomms_ideal
0,21,"[296, 356, 318, 110, 593, 480, 150, 592, 590, 380, 589, 457, 344, 780, 588, 153, 1, 527, 47, 377]","[47997, 2717, 33679, 117529, 119145, 122886, 122896, 122922, 136020, 143385, 152081, 164179, 167036, 296, 541, 780, 2617, 6155, 30793, 32296, 34048, 36519, 50872, 51662, 53996, 60397, 63082, 68791, 69122, 69526, 72378, 73321, 78469, 79293, 79592, 87520, 90249, 91535, 95167, 96588, 108190, 112138, 114180, 115149, 116823, 122900, 122904, 135133, 135536, 138036, 164909, 166492, 168248, 902, 2052, 2953, 4700, 5219, 5254, 7373, 32587, 34150, 41566, 72641, 93510, 110553, 126548, 130450, 136016, 168252, 1573, 3697, 142536, 58025, 97913, 148675, 6874, 7438, 38038, 143245, 149380, 2174, 5266, 61160, 108932, 160565, 1391, 160872, 173307]"
1,103,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[1206, 1222, 2997, 3160, 64614, 81932, 148626, 48516, 56782, 86882, 923, 1945, 4677, 6873, 61323, 69140, 96728, 116799, 367, 2018, 2421, 4254, 7367, 50912, 106766, 168250, 122886]"
2,105,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[2131, 4788, 5889, 37731, 57183, 80124, 104780, 116897, 120138, 120478, 129514, 130970, 134095, 136445, 136447, 138835, 140265, 141816, 142020, 143511, 145994, 147196, 147250, 147286, 147300, 147326, 147328, 147330, 150548, 151769, 159811, 159817, 163072, 163112, 163386, 163925, 165959, 166183, 170597, 170705, 170777, 171011, 171749, 172577, 172583, 172585, 172587, 172589, 172637, 172793, 172909, 173351, 173355, 173619, 173963, 174551, 175293, 175387, 175397, 175431, 179133, 56060, 86345, 86347, 92535, 98604, 104069, 107408, 120625, 127098, 133712, 133716, 134326, 140038, 141818, 141830, 152081, 163134, 163645, 172825, 173535, 175435, 935, 75341, 101088, 118888, 118896, 122904, 141810, 141820, 147282, 152591, 170411, 175401, 178613, 8533, 54997, 158402]"
3,112,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[91529, 6, 16, 253, 508, 68157, 79132, 19, 62, 104, 141, 288, 292, 454, 1356, 1393, 3717, 68954, 72998, 99114, 551, 778, 70286, 25, 208, 329, 21, 185, 2, 95, 161, 223, 339, 788, 17, 36, 39, 300, 357, 434]"
4,119,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[67255, 103984, 136598, 138210, 148626, 318, 7458, 56174, 89087, 112552, 122904, 131023, 135532, 141004, 142488, 157699, 162350, 165551, 168252, 48774, 50872, 108689, 110718, 110730, 114795, 117533, 129354, 134853, 139385, 139642, 166461, 7454, 8464, 8622, 51077, 64839, 136562, 140711]"
5,233,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[356, 177593, 2571, 3949, 58559, 50, 608, 858, 1206, 1298, 2028, 2324, 2997, 3081, 5418, 5952, 5954, 5995, 6377, 7371, 48516, 60069, 74458, 106100, 112183, 159817, 170705, 47, 364, 778, 924, 1059, 1721, 1921, 2692, 2762, 4239, 4262, 4995, 5816, 5956, 5989, 6016, 6874, 7438, 8368, 8961, 33794, 44555, 49272, 73017, 79132, 81591, 81845, 91529, 92259, 99114, 168266, 180031, 183897, 1, 110, 648, 1136, 1193, 1225, 1580, 1884, 2858, 3285, 6365, 6539, 7458, 27803, 32587, 44191, 54286, 55765, 55872, 57669, 58025, 59315, 69122, 69134, 70286, 86298, 91658, 96020, 109487, 122904, 134214, 178061, 780, 1089, 3948, 6863, 6934, 51086, 72998, 78103, ...]"
6,249,"[592, 590, 153, 165, 316, 349, 231, 161, 292, 454, 329, 185, 10, 34, 339, 597, 300, 587, 539, 39]","[96821, 139385, 164179, 166528, 168252, 187593, 1193, 7438, 122904, 122906, 122912, 122916, 122918, 122920, 122922, 122926, 128360, 137857, 142488, 143355, 148626, 152081, 152970, 158238, 158872, 160718, 166534, 166643, 171763, 175569, 176371, 179819, 185029, 541, 4896, 7254, 8914, 81834, 88125, 108729, 115149, 116823, 122890, 122924, 135133, 135569, 140715, 152077, 157296, 159093, 159755, 161131, 167746, 168254, 168266, 169984, 176101, 182715, 184253, 2717, 120799, 142422, 149352, 160080, 162478, 162606, 167370, 182823, 184015, 135567, 143367, 146210, 148888, 152091, 156609, 160954, 170813, 170875, 173941, 185435, 143365]"
7,285,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[858, 903, 904, 912, 1104, 1252, 1968, 1276, 949, 1304, 1945, 3296, 109374, 474, 7265, 113207, 4091, 1285]"
8,292,"[150, 527, 316, 47, 50, 648, 292, 208, 454, 329, 185, 2858, 736, 339, 300, 539, 288, 39, 410, 95]","[106487, 116823, 1278, 2302, 56775, 102903, 108190, 115617, 119145, 122900, 135133, 166528, 440, 73321, 103335, 130490, 113378, 114180, 2291, 122882, 164909, 1573, 64614, 70286, 102407, 130576, 113345, 129937, 165549, 127323, 118900]"
9,298,"[296, 356, 318, 593, 480, 110, 589, 780, 260, 150, 2571, 592, 1, 457, 380, 527, 590, 50, 47, 2858]","[6283, 1247, 1274, 2858, 50610, 122886, 741, 1220, 1476, 1968, 2115, 2335, 2772, 7099, 136449, 156371, 413, 562, 1320, 1387, 1587, 1916, 2019, 2144, 2193, 2195, 2416, 2628, 2826, 3477, 5378, 33493, 37384, 47640, 54745, 65982, 93363, 96588, 102993, 103048, 103980, 104241, 122904, 122920, 127198, 127202, 133771, 139385, 141688, 163, 208, 780, 1275, 1285, 1439, 1584, 2572, 2599, 2694, 3254, 3617, 3791, 4558, 4900, 36537, 44840, 54256, 61160, 64231, 70663, 76060, 79134, 102033, 103539, 104211, 105213, 108945, 112370, 115713, 135567, 136020, 153, 539, 1466, 1526, 1690, 1746, 1855, 5283, 5363, 5573, 7048, 34523, 47518, 56012, 56801, 58655, 68600, 71135, 84615, ...]"


In [456]:
aps = []

for pred, label in merged_df_test[["recomms_ideal","recomms_df"]].values:
  n = len(pred)
  arange = np.arange(n, dtype=np.int32) + 1.
  rel_k = np.in1d(pred[:n], label)
  tp = np.ones(rel_k.sum(), dtype=np.int32).cumsum()
  denom = arange[rel_k]
  ap = (tp / denom).sum() / len(label)
  aps.append(ap)

In [457]:
MAP = np.mean(aps)
print(f'mean average precision = {round(MAP, 5)}')

mean average precision = 0.02377
