In [1]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.float_format = '{:,.3f}'.format
pd.set_option('display.max_columns', 100)

import warnings
warnings.filterwarnings('ignore')




  from pandas.core import (


# Recomendación. Campaña de email marketing.

# Objetivo

- Desarrollar una estrategia de email marketing dirigida a 10,000 clientes, priorizando los productos que generen mayor beneficio.



# Aspectos a tener en cuenta

1. Modelos de Predicción:

- Crear modelos predictivos para determinar qué productos tienen mayor probabilidad de ser contratados por cada cliente.
- Considerar no solo la probabilidad de contratación, sino también el beneficio económico de cada producto.
- Utilizar técnicas de modelado avanzadas como árboles de decisión, regresión logística y redes neuronales.
- Validar y evaluar los modelos utilizando métricas como la precisión, el recall y el AUC-ROC.
2. Selección de Clientes:
- Seleccionar los 10,000 clientes que recibirán los emails basados en los resultados del modelo predictivo.
- Justificar la elección de estos clientes y la estrategia empleada.

# Enfoques para la creación de los modelos predictivos

1. Creación de un modelo para cada producto:

- En este enfoque, se crearía un modelo predictivo separado para cada producto (por ejemplo, uno para short_term_deposit, otro para loans, otro para mortgage, etc.).

- Ventajas: Es específico para cada producto. El modelo se entrenaría específicamente para un producto, lo que permite que aprenda patrones únicos relacionados con la contratación de ese producto.
- Interpretabilidad: Los factores que influyen en la contratación de cada producto pueden ser más fáciles de interpretar.
- Desventajas: Sería costoso en términos de recursos ya que requiere entrenar y mantener múltiples modelos. Además, muchos de los modelos podrían utilizar características similares, lo que significa que se estaría trabajando doble.

2. Modelo agrupado por tipos de productos:

- Se agruparían los productos similares en categorías, exactamente como lo explicaba Erin (Responsable de Marketing Directo) en uno de sus correos [cuentas, productos de ahorro e inversión (planes, fondos, etc.) y financiación (préstamos y tarjetas)].

- Ventajas: Menos modelos que mantener en comparación con la opción de un modelo por producto. Además, se pueden identificar patrones que se aplican a categorías de productos.
- Desventajas: Puede que no capture detalles específicos de un solo producto tan bien como un modelo dedicado.

3. Modelo General para Todos los Productos:

- Se utilizaría un único modelo para predecir la probabilidad de que un cliente contrate cualquier producto.
- Ventajas: Es simple, solo hay que entrenar y mantener un único modelo, por tanto menos recursos computacionales y menos tiempo de entrenamiento.
- Desventajas: Puede ser más complejo interpretar las relaciones ya que el modelo tiene que captar información de todos los productos y si un producto es raro comparado con otros, el modelo puede no predecirlo correctamente.



# Procedimiento para la creación de los modelos predictivos.

Teniendo en cuenta lo anterior, a continuación se muestra el procedimiento a deasrrollar para resolver esta Tarea:

- Se trabajará a partir de un Modelo General Inicial que prediga la probabilidad de contratación de productos en general.


1. Preparación de Datos

- Se carga el dataset df_full_cleaned desde S3 de AWS
  
- Se crea la variable objetivo (compra): una columna binaria que indica si el cliente ha comprado algún producto (1) o no (0), independientemente del producto que sea, es decir, si al menos a comprado un producto es (1) si no es (0) .
- Se hace la partición de los datos en conjunto de entrenamiento y prueba.

2. Iteración 1: Modelos Iniciales. Para el entrenamiento se van a tener en cuenta los siguientes modelos:

- Regresión Logística
- Random Forest
- XGBoost
- K-Nearest Neighbors (KNN)
- Support Vector Machine (SVM)

- Se usará accuracy, precision, recall, F1-score para evaluar el rendimiento de cada modelo en el conjunto de prueba y se analiza el comportamiento de los modelos, así como la importancia de las características. 

3. Iteración 2: Feature Engineering

- Se crean nuevas características, generando interacciones o combinaciones de variables, teneindo en cuenta las caracteristicas más relevantes en la Iteración 1.

- Entrenar y evaluar los mismos modelos que en la iteración 1, con las nuevas características.

4. Iteración 3: Refinamiento del Modelo

- Análisis de las características menos relevantes según su importancia en los modelos anteriores.

- Se vuelven a entrenar los modelos, se evaluan y se comparan los resultados.

5. Selección del modelo y ajuste de hiperparámetros

- A partir del modelo que mejor comportamiento presente se realiza un ajuste de hiperparámetros usando GridSearchCV. 
 
- Se evalua el rendimiento final del modelo ajustado en el conjunto de prueba.

NOTA: A la hora de ajustar los hiperparámetros, lo normal sería realizar el análisis para cada uno de los modelos, ya que existe la posibilidad de que un modelo pueda mejorar con respecto a otro pero en este caso se toma la decisión, para ahorrar recursos, de realizar el ajuste para un solo modelo, el de mejor comportameinto en la Iteración 3.



---

## 1. Preparación de datos.

In [2]:
# cargando el dataset completo directamente desde S3 de AWS

df_full = pd.read_parquet("https://easy-money-project-bucket.s3.eu-west-3.amazonaws.com/df_full_cleaned.parquet")
df_full


Unnamed: 0,pk_cid,pk_partition,short_term_deposit,loans,mortgage,funds,securities,long_term_deposit,em_account_pp,credit_card,payroll,pension_plan,payroll_account,emc_account,debit_card,em_account_p,em_acount,num_products_contracts,p_cuenta_bancaria,cuentas_sum,p_inversion,inversion_sum,p_financiacion,financiacion_sum,profit_cuentas,profit_inversion,profit_financiacion,country_id,gender,mes_partition,mes_nombre_partition,grupo_edad,median_salary,region_code,entry_date,entry_channel,active_customer,segment,categoria_antiguedad
0,1375586,2018-01-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,H,1,January,Adultos jóvenes,87218.100,Málaga,2018-01-12,Otros,1,02 - PARTICULARES,1-2 años
1,1050611,2018-01-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,V,1,January,Jóvenes,35548.740,Ciudad Real,2015-08-10,KHE,0,03 - UNIVERSITARIO,Más de 3 años
2,1050612,2018-01-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,V,1,January,Jóvenes,122179.110,Ciudad Real,2015-08-10,KHE,0,03 - UNIVERSITARIO,Más de 3 años
3,1050613,2018-01-28,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,40,0,ES,H,1,January,Jóvenes,119775.540,Zaragoza,2015-08-10,KHD,0,03 - UNIVERSITARIO,Más de 3 años
4,1050614,2018-01-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,V,1,January,Jóvenes,101469.135,Zaragoza,2015-08-10,KHE,1,03 - UNIVERSITARIO,Más de 3 años
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5962919,1166765,2019-05-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,V,5,May,Jóvenes,43912.170,Zaragoza,2016-08-14,KHE,0,03 - UNIVERSITARIO,2-3 años
5962920,1166764,2019-05-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,V,5,May,Jóvenes,23334.990,"Rioja, La",2016-08-14,KHE,0,03 - UNIVERSITARIO,2-3 años
5962921,1166763,2019-05-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,H,5,May,Adultos,87930.930,Zaragoza,2016-08-14,KHE,1,02 - PARTICULARES,2-3 años
5962922,1166789,2019-05-28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,10,0,0,ES,H,5,May,Jóvenes,199592.820,Zaragoza,2016-08-14,KHE,0,03 - UNIVERSITARIO,2-3 años


In [3]:
# se agrupa los productos por cliente y se calcula la media de los productos
# lo cual indica la actividad de cada cliente en cada producto

lista_productos = ["pk_cid","short_term_deposit", "loans", "mortgage", "funds", "securities", 
                   "long_term_deposit", "em_account_pp", "credit_card", "payroll_account", 
                   "emc_account", "debit_card", "em_account_p", "em_acount", "payroll", "pension_plan"]

df_productos = df_full[lista_productos].groupby("pk_cid").mean()
df_productos


Unnamed: 0_level_0,short_term_deposit,loans,mortgage,funds,securities,long_term_deposit,em_account_pp,credit_card,payroll_account,emc_account,debit_card,em_account_p,em_acount,payroll,pension_plan
pk_cid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
15891,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000
16063,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
16203,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.833,0.000,0.000
16502,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.556,0.000,0.000,0.889,0.000,0.000
17457,0.000,0.000,0.000,0.000,0.000,0.941,0.000,0.000,0.000,0.000,0.294,0.000,1.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1553685,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1553686,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1553687,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1553688,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [4]:
# columnas a utilizar sacadas del dataset completo que se van a utilizar para el modelo

columnas_relevantes = ["pk_cid","country_id"	,"gender",	"grupo_edad", "median_salary",	"region_code",	
                       "entry_date",	"entry_channel"	,"active_customer",	"segment",	"categoria_antiguedad"]

df_relevantes = df_full[columnas_relevantes].groupby("pk_cid").last()

In [10]:
df_fulll = df_productos.merge(df_relevantes, on = "pk_cid")
df_fulll

Unnamed: 0_level_0,short_term_deposit,loans,mortgage,funds,securities,long_term_deposit,em_account_pp,credit_card,payroll_account,emc_account,debit_card,em_account_p,em_acount,payroll,pension_plan,country_id,gender,grupo_edad,median_salary,region_code,entry_date,entry_channel,active_customer,segment,categoria_antiguedad
pk_cid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
15891,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.500,0.000,0.000,ES,H,Adultos mayores,117380.925,Madrid,2018-07-28,KAT,0,02 - PARTICULARES,0-3 meses
16063,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,ES,H,Adultos mayores,132384.450,Madrid,2018-11-19,KAT,0,02 - PARTICULARES,6-9 meses
16203,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.833,0.000,0.000,ES,V,Ancianos,153902.880,Barcelona,2018-12-23,KAT,1,01 - TOP,3-6 meses
16502,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.556,0.000,0.000,0.889,0.000,0.000,ES,H,Adultos mayores,132384.450,Madrid,2018-09-30,KHN,1,02 - PARTICULARES,6-9 meses
17457,0.000,0.000,0.000,0.000,0.000,0.941,0.000,0.000,0.000,0.000,0.294,0.000,1.000,0.000,0.000,ES,H,Adultos,102405.750,Madrid,2017-09-16,KAT,1,02 - PARTICULARES,1-2 años
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1553685,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,ES,V,Adultos,114633.765,Ciudad Real,2019-05-31,Otros,0,03 - UNIVERSITARIO,0-3 meses
1553686,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,ES,H,Adultos jóvenes,85787.220,Sevilla,2019-05-31,Otros,0,03 - UNIVERSITARIO,0-3 meses
1553687,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,ES,V,Jóvenes,143419.770,Madrid,2019-05-31,Otros,0,03 - UNIVERSITARIO,0-3 meses
1553688,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,ES,H,Adultos,114633.765,Cantabria,2019-05-31,Otros,0,03 - UNIVERSITARIO,0-3 meses


In [6]:
# Creación de la variable objetivo

#df_full["compra"] = df_full["num_products_contracts"].apply(lambda x: 1 if x > 0 else 0)

In [7]:
# Comportamiento de la variable objetivo

#(df_full["compra"].value_counts(normalize=True)*100).plot(kind="bar")
#for i, v in enumerate(df_full["compra"].value_counts(normalize=True)*100):
#    plt.text(i, v + 0.5, str(round(v, 2)) + "%", ha='center', va='bottom')

In [8]:
"""
# transformación de variables objects a categóricas

df_full["mes_nombre_partition"] = df_full["mes_nombre_partition"].astype("category")
df_full["country_id"] = df_full["country_id"].astype("category")
df_full["gender"] = df_full["gender"].astype("category")
df_full["grupo_edad"] = df_full["grupo_edad"].astype("category")
df_full["region_code"] = df_full["region_code"].astype("category")  
df_full["entry_channel"] = df_full["entry_channel"].astype("category")  """

'\n# transformación de variables objects a categóricas\n\ndf_full["mes_nombre_partition"] = df_full["mes_nombre_partition"].astype("category")\ndf_full["country_id"] = df_full["country_id"].astype("category")\ndf_full["gender"] = df_full["gender"].astype("category")\ndf_full["grupo_edad"] = df_full["grupo_edad"].astype("category")\ndf_full["region_code"] = df_full["region_code"].astype("category")  \ndf_full["entry_channel"] = df_full["entry_channel"].astype("category")  '

In [9]:
"""

# Función para calcular métricas y agregar a las tablas
def metricas(y_test, y_pred, y_test_score, y_train, y_train_pred, y_train_score, name, tabla_metricas):
    # Test metrics
    test_accuracy = accuracy_score(y_test, y_pred)
    test_precision = precision_score(y_test, y_pred)
    test_recall = recall_score(y_test, y_pred)
    test_f1 = f1_score(y_test, y_pred)
    test_roc_auc = roc_auc_score(y_test, y_test_score)
    
    # Train metrics
    train_accuracy = accuracy_score(y_train, y_train_pred)
    train_precision = precision_score(y_train, y_train_pred)
    train_recall = recall_score(y_train, y_train_pred)
    train_f1 = f1_score(y_train, y_train_pred)
    train_roc_auc = roc_auc_score(y_train, y_train_score)
    
    # Agregar a la tabla de métricas (test)
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Test Accuracy', round(test_accuracy, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Test Precision', round(test_precision, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Test Recall', round(test_recall, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Test F1-score', round(test_f1, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Test ROC AUC', round(test_roc_auc, 2)]
    
    # Agregar a la tabla de métricas (train)
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Train Accuracy', round(train_accuracy, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Train Precision', round(train_precision, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Train Recall', round(train_recall, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Train F1-score', round(train_f1, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Train ROC AUC', round(train_roc_auc, 2)]

# Función para la matriz de confusión
def confusion_matrix_figure(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, ax=ax)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    return fig

# 1. Cargar el dataset
X_full = df_full.copy()

# 2. Eliminar filas con la variable objetivo nula
X_full.dropna(axis=0, subset=['compra'], inplace=True)
y = X_full['compra']  # Variable objetivo
X_full.drop(['compra'], axis=1, inplace=True)

# 3. Eliminar columnas con más del 80% de valores nulos
porcentaje_nulos = X_full.isnull().mean()
columnas_a_eliminar = porcentaje_nulos[porcentaje_nulos > 0.80].index
X_full = X_full.drop(columns=columnas_a_eliminar)

# 4. Dividir los datos en entrenamiento (60%), validación (20%) y test (20%)
X_train_full, X_temp, y_train, y_temp = train_test_split(X_full, y, train_size=0.6, test_size=0.4, random_state=11, stratify=y)
X_valid_full, X_test_full, y_valid, y_test = train_test_split(X_temp, y_temp, train_size=0.5, test_size=0.5, random_state=11, stratify=y_temp)

# 5. Seleccionar columnas categóricas y numéricas
object_cols = [cname for cname in X_train_full.columns if X_train_full[cname].nunique() < 5 and X_train_full[cname].dtype == "object"]
categorical_cols = [cname for cname in X_train_full.columns if X_train_full[cname].dtype == "category"]
numerical_cols = [cname for cname in X_train_full.columns if X_train_full[cname].dtype in ['int64', 'float64']]
my_cols = categorical_cols + numerical_cols + object_cols
X_train = X_train_full[my_cols].copy()
X_valid = X_valid_full[my_cols].copy()
X_test = X_test_full[my_cols].copy()

# 6. Preprocesamiento de los datos
numerical_transformer = SimpleImputer(strategy='constant')
categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')), ('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[('num', numerical_transformer, numerical_cols), ('cat', categorical_transformer, categorical_cols)])

# 7. Definir modelos a evaluar
models = {
    'RandomForest': RandomForestClassifier(),
    #'XGBoost': XGBClassifier(),
    'LogisticRegression': LogisticRegression(),
    #'SVM': SVC(probability=True)
}

# 8. Entrenar y evaluar cada modelo
tabla_metricas = pd.DataFrame(columns=['Modelo', 'Métrica', 'Valor'])

for name, model in models.items():
    clf = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
    clf.fit(X_train, y_train)
    
    y_pred = clf.predict(X_valid)
    y_pred_proba = clf.predict_proba(X_valid)[:, 1]
    
    accuracy = accuracy_score(y_valid, y_pred)
    roc_auc = roc_auc_score(y_valid, y_pred_proba)
    
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'Accuracy', round(accuracy, 2)]
    tabla_metricas.loc[len(tabla_metricas)] = [name, 'ROC AUC', round(roc_auc, 2)]

# 9. Entrenar y evaluar DecisionTreeClassifier
features = X_train.columns
decision_tree = DecisionTreeClassifier(max_depth=50, min_samples_leaf=350, random_state=42)
decision_tree.fit(X_train[features], y_train)

y_train_pred = decision_tree.predict(X_train[features])
y_train_score = decision_tree.predict_proba(X_train[features])[:, 1]

# Predicción para el conjunto de validación
y_pred_valida = decision_tree.predict(X_valid[features])
y_valida_score = decision_tree.predict_proba(X_valid[features])[:, 1]

# Calcular métricas y agregarlas a la tabla
metricas(y_valid, y_pred_valida, y_valida_score, y_train, y_train_pred, y_train_score, 'DecisionTreeClassifier', tabla_metricas)

# 10. Visualizar la matriz de confusión para el modelo de Decision Tree
fig = confusion_matrix_figure(y_valid, y_pred_valida)
fig.show()

# 11. Mostrar las métricas
print("Tabla de métricas:")
print(tabla_metricas)

"""

'\n\n# Función para calcular métricas y agregar a las tablas\ndef metricas(y_test, y_pred, y_test_score, y_train, y_train_pred, y_train_score, name, tabla_metricas):\n    # Test metrics\n    test_accuracy = accuracy_score(y_test, y_pred)\n    test_precision = precision_score(y_test, y_pred)\n    test_recall = recall_score(y_test, y_pred)\n    test_f1 = f1_score(y_test, y_pred)\n    test_roc_auc = roc_auc_score(y_test, y_test_score)\n    \n    # Train metrics\n    train_accuracy = accuracy_score(y_train, y_train_pred)\n    train_precision = precision_score(y_train, y_train_pred)\n    train_recall = recall_score(y_train, y_train_pred)\n    train_f1 = f1_score(y_train, y_train_pred)\n    train_roc_auc = roc_auc_score(y_train, y_train_score)\n    \n    # Agregar a la tabla de métricas (test)\n    tabla_metricas.loc[len(tabla_metricas)] = [name, \'Test Accuracy\', round(test_accuracy, 2)]\n    tabla_metricas.loc[len(tabla_metricas)] = [name, \'Test Precision\', round(test_precision, 2)]\n  