In [1]:
import pandas as pd
import json
import matplotlib.pyplot as plt
#from mlxtend.frequent_patterns import association_rules, apriori
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

#https://www.kaggle.com/rmisra/clothing-fit-dataset-for-size-recommendation?select=renttherunway_final_data.json

In [2]:
df_train = pd.read_csv("X_train.csv")
df_test = pd.read_csv("X_test.csv")

# Ingenieria de datos

In [3]:
#Nos quedamos con las columnas que precisamos
df_columns = df_train[["user_id","item_id","rating"]]

In [4]:
df_columns.shape

(79155, 3)

In [5]:
#Revisamos si tiene nan
print(df_columns.isna().sum())
print(df_columns.shape)


user_id    0
item_id    0
rating     0
dtype: int64
(79155, 3)


In [6]:
#Revisamos si existe ratings con decimales
df_columns.rating.value_counts()

10.0    49532
8.0     22626
6.0      5202
4.0      1375
2.0       420
Name: rating, dtype: int64

In [7]:
#Lo dejamos como int
df_columns['rating'] = df_columns['rating'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_columns['rating'] = df_columns['rating'].astype(int)


In [8]:
df_columns.head()

Unnamed: 0,user_id,item_id,rating
0,13906,2616422,10
1,18490,152836,8
2,476969,176490,10
3,542220,1777332,8
4,334094,1547971,10


In [9]:
#Function that computes the root mean squared error (or RMSE)
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [10]:
X_train = df_columns 
X_test = df_test

# Popularity Recommender

In [11]:
#sumo los raitings por item
sum_ratings_df = pd.DataFrame(X_train.groupby(["item_id"])["rating"].sum()).reset_index()
#Los dejo descendentes por rating
sum_ratings_df = sum_ratings_df.sort_values(["rating"], ascending=False).reset_index(drop=True)
sum_ratings_df.head()

Unnamed: 0,item_id,rating
0,126335,5320
1,174086,4852
2,136110,3378
3,123793,3274
4,127865,3152


# Calculo Recall y Precision

In [12]:
X_test["rating"].value_counts()

10.0    21266
8.0      9735
6.0      2201
4.0       554
2.0       168
Name: rating, dtype: int64

In [13]:
#M quedo con los usuarios que dieron raiting alto para comparar contra mis predicciones
X_test = X_test[X_test["rating"] >= 8]
X_test["rating"].value_counts()

10.0    21266
8.0      9735
Name: rating, dtype: int64

In [14]:
#Nos quedamos con los usuarios unicos de test para predecir raitings con items
users_test_unique = X_test["user_id"].unique()
users_test_unique

array([495160, 510655, 974819, ..., 731568, 220884, 241155], dtype=int64)

In [15]:
#Nos quedamos con lo items unicos para predecir raiting
#Tomamos solo train, si un item esta solo en test es problema de cold start de items que 
item_train_unique = X_train["item_id"].unique()
item_train_unique

array([2616422,  152836,  176490, ..., 2476462, 2213342,  991459],
      dtype=int64)

In [16]:
def recomendation_popular(users_test_unique, n):
    reco_all_user = []
    for user_id in users_test_unique:
        reco_all_user.append(sum_ratings_df["item_id"].head(n).to_list())
    
    return reco_all_user
    

In [17]:
def score_metrics(reco_all_user, n=6):
    TP = 0
    FN = 0
    for i in range(len(reco_all_user)):
        user = users_test_unique[i]
        list_test_item_per_user = X_test[X_test["user_id"]==user]["item_id"].values
        reco_user = reco_all_user[i]
        for item in list_test_item_per_user:      
            if item in reco_user:
                TP += 1
            else:
                FN += 1
    precision = round((TP)/((n-1)*len(users_test_unique)),5) #menos 1 porque el n debe ser n+1. ALAN: NO ENTIENDO PORQUE SI LA LISTA IMPRIME 5 RECOMENDACIONES POR CADA USUARIO
    recall = round((TP)/(TP+FN),5)
    f1 = 2*((precision * recall)/(precision + recall))
    return precision, recall, f1

In [18]:
len(users_test_unique)

23721

In [19]:
users_test_unique[23720]

241155

In [20]:
users_test_unique[0]

495160

## Con 5 recomendaciones

In [21]:
reco_all_user = []

In [22]:
# ALAN: Habria que revisar las recomendaciones siempre mete el item 1106101 en casi todas las recomendaciones
%time
reco_all_user_5 = recomendation_popular(users_test_unique,5)
reco_all_user_5

Wall time: 0 ns


[[126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 174086, 136110, 123793, 127865],
 [126335, 1

In [23]:
len(reco_all_user_5)

23721

In [24]:
%time
precision_5, recall_5, f1_5 = score_metrics(reco_all_user_5, n=6)

Wall time: 0 ns


In [25]:
print(precision_5)
print(recall_5)
print(f1_5)

0.008
0.03061
0.012684796684796685


## Con 10 recomendaciones

In [26]:
reco_all_user_10 = recomendation_popular(users_test_unique,10)

In [27]:
reco_all_user_10[1:3]

[[126335,
  174086,
  136110,
  123793,
  127865,
  172027,
  137585,
  132738,
  145906,
  131533],
 [126335,
  174086,
  136110,
  123793,
  127865,
  172027,
  137585,
  132738,
  145906,
  131533]]

In [28]:
precision_10, recall_10, f1_10 = score_metrics(reco_all_user_10, n=11)

In [29]:
print(precision_10)
print(recall_10)
print(f1_10)

0.00669
0.05119
0.011833486523842432


## Con 20 recomendaciones

In [30]:
reco_all_user_20 = recomendation_popular(users_test_unique,20)

In [31]:
reco_all_user_20[1:2]

[[126335,
  174086,
  136110,
  123793,
  127865,
  172027,
  137585,
  132738,
  145906,
  131533,
  166633,
  136860,
  125465,
  131117,
  124204,
  130259,
  152836,
  730008,
  1226293,
  124553]]

In [32]:
precision_20, recall_20, f1_20 = score_metrics(reco_all_user_20, n=21)

In [33]:
print(precision_20)
print(recall_20)
print(f1_20)

0.00532
0.08138
0.00998711880046136


## Con 50 recomendaciones

In [34]:
reco_all_user_50 = recomendation_popular(users_test_unique,50)

In [35]:
precision_50, recall_50, f1_50 = score_metrics(reco_all_user_50, n=51)

In [36]:
print(precision_50)
print(recall_50)
print(f1_50)

0.00354
0.13561
0.006899883578871722


# MAR@K metric

Map@k and Mar@k metrics suffer from popularity bias. If a model works well on popular items, the majority of recommendations will be correct, and Mar@k and Map@k can appear to be high while the model may not be making useful or personalized recommendations.

In [37]:
import recmetrics

In [38]:
#Estos son los verdaderos
list_test_item_per_user = []
for user in users_test_unique:
    list_test_item_per_user.append(X_test[X_test["user_id"]==user]["item_id"].values.tolist())
list_test_item_per_user[1:10]

[[154002],
 [1111981],
 [734229],
 [512791],
 [2958376],
 [124204],
 [1519172],
 [2098200, 766885, 2261828],
 [1780063]]

In [39]:
mark5 = recmetrics.mark(list_test_item_per_user, reco_all_user_5, k=5)
print(mark5)
print(recall_5)

0.01892705913921293
0.03061


In [40]:
mark10 = recmetrics.mark(list_test_item_per_user, reco_all_user_10, k=10)
print(mark10)
print(recall_10)

0.022181135573172164
0.05119


In [41]:
mark20 = recmetrics.mark(list_test_item_per_user, reco_all_user_20, k=20)
print(mark20)
print(recall_20)

0.024692932077311998
0.08138


In [42]:
mark50 = recmetrics.mark(list_test_item_per_user, reco_all_user_50, k=50)
print(mark50)
print(recall_50)

0.026719237277124983
0.13561


# Prediction coverage

Busca ver qué cantidad de ítems distintos se muestra en las predicciones. La cuenta es fácil, cantidad de ítems únicos que muestro a los usuarios finales en las predicciones dividido la cantidad de ítems únicos en train. Lo multiplico por 100 para llevarlo a porcentaje.

In [43]:
def prediction_coverage(predicted, catalog):
    """
    Computes the prediction coverage for a list of recommendations
    Parameters
    ----------
    predicted : a list of lists
        Ordered predictions
        example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']]
    catalog: list
        A list of all unique items in the training data
        example: ['A', 'B', 'C', 'X', 'Y', Z]
    Returns
    ----------
    prediction_coverage:
        The prediction coverage of the recommendations as a percent
        rounded to 2 decimal places
    ----------    
    Metric Defintion:
    Ge, M., Delgado-Battenfeld, C., & Jannach, D. (2010, September).
    Beyond accuracy: evaluating recommender systems by coverage and serendipity.
    In Proceedings of the fourth ACM conference on Recommender systems (pp. 257-260). ACM.
    """
    predicted_flattened = [p for sublist in predicted for p in sublist]
    unique_predictions = len(set(predicted_flattened))
    prediction_coverage = round(unique_predictions/(len(catalog)* 1.0)*100,2)
    return prediction_coverage

In [44]:
#Catalogos de todos los items en train.
catalog = df_train["item_id"].unique().tolist()

In [45]:
pop_coverage_5 = prediction_coverage(reco_all_user_5, catalog)
pop_coverage_10 = prediction_coverage(reco_all_user_10, catalog)
pop_coverage_20 = prediction_coverage(reco_all_user_20, catalog)
pop_coverage_50 = prediction_coverage(reco_all_user_50, catalog)

# Personalization

La idea de este indicador es que tan diferentes son las listas de recomendaciones entre distintos usuarios.

Por ejemplo:

Usuario A: item X, item Y, item Z, item T

Usuario B: item X, item Y, item Z, item V

Indicador personalizacion: 0.25

No es tan asi porque adentro tambien usa similitud del coseno que no entendi para que. Pero en pocas palabras revisa que tan diferentes son las listas recomendadas a distintos usuarios.

* A high score indicates good personalization (user's lists of recommendations are different).
* A low score indicates poor personalization (user's lists of recommendations are very similar).

Valor entre 0 y 1. Se puede pasar a porcentaje.

In [46]:
personalization_5 = recmetrics.personalization(predicted=reco_all_user_5)
personalization_10 = recmetrics.personalization(predicted=reco_all_user_10)
personalization_20 = recmetrics.personalization(predicted=reco_all_user_20)
personalization_50 = recmetrics.personalization(predicted=reco_all_user_50)

In [47]:
print(personalization_5)
print(personalization_10)
print(personalization_20)
print(personalization_50)

2.220446049250313e-16
2.220446049250313e-16
-2.220446049250313e-16
-2.220446049250313e-16


# Novelty

La novedad mide la capacidad del sistema de recomendación para proponer elementos novedosos e inesperados que es poco probable que un usuario ya conozca.

Tiene una formula muy rara que es estadistica.

Su valor va de 0 a mas infinito. El que esta mas alto es el mejor, si da bien revisar como busca esta metrica.

In [48]:
def novelty(predicted, pop, u, n):
    """
    Computes the novelty for a list of recommendations
    Parameters
    ----------
    predicted : a list of lists
        Ordered predictions
        example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']]
    pop: dictionary
        A dictionary of all items alongside of its occurrences counter in the training data
        example: {1198: 893, 1270: 876, 593: 876, 2762: 867}
    u: integer
        The number of users in the training data
    n: integer
        The length of recommended lists per user
    Returns
    ----------
    novelty:
        The novelty of the recommendations in system level
    mean_self_information:
        The novelty of the recommendations in recommended top-N list level
    ----------    
    Metric Defintion:
    Zhou, T., Kuscsik, Z., Liu, J. G., Medo, M., Wakeling, J. R., & Zhang, Y. C. (2010).
    Solving the apparent diversity-accuracy dilemma of recommender systems.
    Proceedings of the National Academy of Sciences, 107(10), 4511-4515.
    """
    mean_self_information = []
    k = 0
    for sublist in predicted:
        self_information = 0
        k += 1
        for i in sublist:
            self_information += np.sum(-np.log2(pop[i]/u))
        mean_self_information.append(self_information/n)
    novelty = sum(mean_self_information)/k
    return novelty, mean_self_information

In [49]:
nov = df_train.item_id.value_counts()
pop = dict(nov)

In [50]:
novelty_5,mean_self_info_list_5 = novelty(reco_all_user_5, pop, len(users_test_unique), 5)
novelty_10,mean_self_info_list_10 = novelty(reco_all_user_10, pop, len(users_test_unique), 10)
novelty_20,mean_self_info_list_20 = novelty(reco_all_user_20, pop, len(users_test_unique), 20)
novelty_50,mean_self_info_list_50 = novelty(reco_all_user_50, pop, len(users_test_unique), 55)

In [51]:
print(novelty_5)
print(novelty_10)
print(novelty_20)
print(novelty_50)

5.796612042047582
6.024246169790618
6.3685622089125555
6.36697003126092


# Resultados

In [52]:
resultados = {'n_recomendaciones': [5, 10, 20, 50],
              'pop_recall': [recall_5,
                            recall_10,
                            recall_20,
                            recall_50],
              'pop_precision': [precision_5,
                               precision_10,
                               precision_20,
                               precision_50],
              'pop_F1': [f1_5,
                            f1_10,
                            f1_20,
                            f1_50],             
              'MAR@K_popular': [mark5,
                            mark10,
                            mark20,
                            mark50],
               'pred_coverage_popular': [pop_coverage_5,
                            pop_coverage_10,
                            pop_coverage_20,
                            pop_coverage_50],
              
               'personalization_popular': [personalization_5,
                            personalization_10,
                            personalization_20,
                            personalization_50],
              
               'novelty_popular': [novelty_5,
                            novelty_10,
                            novelty_20,
                            novelty_50]
                    }             

In [53]:
resultados_df = pd.DataFrame.from_dict(resultados)
resultados_df

Unnamed: 0,n_recomendaciones,pop_recall,pop_precision,pop_F1,MAR@K_popular,pred_coverage_popular,personalization_popular,novelty_popular
0,5,0.03061,0.008,0.012685,0.018927,0.09,2.220446e-16,5.796612
1,10,0.05119,0.00669,0.011833,0.022181,0.18,2.220446e-16,6.024246
2,20,0.08138,0.00532,0.009987,0.024693,0.36,-2.220446e-16,6.368562
3,50,0.13561,0.00354,0.0069,0.026719,0.89,-2.220446e-16,6.36697


In [54]:
resultados_df.to_csv("resultados_popular.csv",index=False)