# 04 Entrenamiento de Modelos

En esta sección, vamos a entrenar varios modelos de machine learning para predecir si un cliente suscribirá un depósito a plazo. Probaremos diferentes algoritmos, ajustaremos sus hiperparámetros y evaluaremos su rendimiento.

## 1. Cargar Datos Preprocesados y Construidos

In [4]:
## 1. Cargar el Conjunto de Datos
# Importar la librería necesarias
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

# Seleccionar el tipo de balanceo a utilizar
tipo_balanceo = 'oversampled' # 'original', 'oversampled', 'undersampled'

if tipo_balanceo == 'original':
    dir_path = '../data/interim'
elif tipo_balanceo == 'oversampled':
    dir_path = '../data/processed/oversampled'
elif tipo_balanceo == 'undersampled':
    dir_path = '../data/processed/undersampled'

# Cargar el conjunto de datos
X_train = pd.read_csv(f'{dir_path}/X_train.csv')
X_test = pd.read_csv(f'{dir_path}/X_test.csv')
y_train = pd.read_csv(f'{dir_path}/y_train.csv')
y_test = pd.read_csv(f'{dir_path}/y_test.csv')

# Mostrar los primeros registros de los datos de entrenamiento
X_train.head()

Unnamed: 0,age,default,balance,housing,loan,day,duration,campaign,pdays,previous,...,month_jul,month_jun,month_mar,month_may,month_nov,month_oct,month_sep,poutcome_other,poutcome_success,poutcome_unknown
0,0.168831,0,0.072803,1,0,0.5,0.214721,0.016129,0.0,0.0,...,True,False,False,False,False,False,False,False,False,True
1,0.285714,0,0.089527,0,0,0.466667,0.011387,0.0,0.0,0.0,...,False,False,False,True,False,False,False,False,False,True
2,0.272727,0,0.071024,1,0,0.233333,0.025824,0.016129,0.0,0.0,...,False,False,False,True,False,False,False,False,False,True
3,0.210909,0,0.07474,0,0,0.966667,0.04884,0.0147,0.0,0.0,...,False,False,False,False,False,False,False,False,False,True
4,0.194805,0,0.074901,1,0,0.033333,0.094754,0.0,0.0,0.0,...,False,True,False,False,False,False,False,False,False,True


## 2. Entrenamiento de Modelos

### 3.1. Entrenamiento de un Modelo de Regresión Logística

In [5]:
from sklearn.linear_model import LogisticRegression

# Crear una instancia del modelo
lr = LogisticRegression(random_state=8)

# Entrenar el modelo
lr.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_lr = lr.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lr))
print("Classification Report:\n", classification_report(y_test, y_pred_lr))

# Guardar el modelo
joblib.dump(lr, '../models/lr_model.pkl')

Accuracy: 0.8509593707730073
Confusion Matrix:
 [[8538 1447]
 [1528 8448]]
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.86      0.85      9985
           1       0.85      0.85      0.85      9976

    accuracy                           0.85     19961
   macro avg       0.85      0.85      0.85     19961
weighted avg       0.85      0.85      0.85     19961



['../models/lr_model.pkl']

### 3.2. Entrenamiento de un Modelo de Árbol de Decisión

In [6]:
from sklearn.tree import DecisionTreeClassifier

# Crear una instancia del modelo
dt = DecisionTreeClassifier(random_state=8)

# Entrenar el modelo
dt.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_dt = dt.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_dt))
print("Classification Report:\n", classification_report(y_test, y_pred_dt))

# Guardar el modelo
joblib.dump(dt, '../models/dt_model.pkl')

Accuracy: 0.8914884023846501
Confusion Matrix:
 [[8837 1148]
 [1018 8958]]
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.89      0.89      9985
           1       0.89      0.90      0.89      9976

    accuracy                           0.89     19961
   macro avg       0.89      0.89      0.89     19961
weighted avg       0.89      0.89      0.89     19961



['../models/dt_model.pkl']

### 3.3. Entrenamiento de un Modelo de Bosque Aleatorio

In [7]:
from sklearn.ensemble import RandomForestClassifier

# Crear una instancia del modelo
rf = RandomForestClassifier(random_state=8)

# Entrenar el modelo
rf.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_rf = rf.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))

# Guardar el modelo
joblib.dump(rf, '../models/rf_model.pkl')

Accuracy: 0.9384800360703371
Confusion Matrix:
 [[9067  918]
 [ 310 9666]]
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.91      0.94      9985
           1       0.91      0.97      0.94      9976

    accuracy                           0.94     19961
   macro avg       0.94      0.94      0.94     19961
weighted avg       0.94      0.94      0.94     19961



['../models/rf_model.pkl']

### 3.4. Entrenamiento de un Modelo de Gradiente Boosting

In [8]:
from sklearn.ensemble import GradientBoostingClassifier

# Crear una instancia del modelo
gb = GradientBoostingClassifier(random_state=8)

# Entrenar el modelo
gb.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_gb = gb.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_gb))

# Guardar el modelo
joblib.dump(gb, '../models/gb_model.pkl')

Accuracy: 0.9042633134612494
Confusion Matrix:
 [[8866 1119]
 [ 792 9184]]
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.89      0.90      9985
           1       0.89      0.92      0.91      9976

    accuracy                           0.90     19961
   macro avg       0.90      0.90      0.90     19961
weighted avg       0.90      0.90      0.90     19961



['../models/gb_model.pkl']

### 3.5. Entrenamiento de un Modelo de K-Nearest Neighbors

In [9]:
from sklearn.neighbors import KNeighborsClassifier

# Crear una instancia del modelo
knn = KNeighborsClassifier()

# Entrenar el modelo
knn.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_knn = knn.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))
print("Classification Report:\n", classification_report(y_test, y_pred_knn))

# Guardar el modelo
joblib.dump(knn, '../models/knn_model.pkl')

Accuracy: 0.8856269725965633
Confusion Matrix:
 [[8302 1683]
 [ 600 9376]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.83      0.88      9985
           1       0.85      0.94      0.89      9976

    accuracy                           0.89     19961
   macro avg       0.89      0.89      0.89     19961
weighted avg       0.89      0.89      0.89     19961



['../models/knn_model.pkl']

### 3.6. Entrenamiento de un Modelo de Support Vector Machine (demora mucho)

In [11]:
from sklearn.svm import SVC

# Crear una instancia del modelo
svc = SVC(probability=True, random_state=8)

# Entrenar el modelo
svc.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_svc = svc.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_svc))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svc))
print("Classification Report:\n", classification_report(y_test, y_pred_svc))

# Guardar el modelo
joblib.dump(svc, '../models/svc_model.pkl')

### 3.7. Entrenamiento de un Modelo de Naive Bayes

In [10]:
from sklearn.naive_bayes import GaussianNB

# Crear una instancia del modelo
nb = GaussianNB()

# Entrenar el modelo
nb.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_nb = nb.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_nb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_nb))
print("Classification Report:\n", classification_report(y_test, y_pred_nb))

# Guardar el modelo
joblib.dump(nb, '../models/nb_model.pkl')

Accuracy: 0.7104353489304143
Confusion Matrix:
 [[8589 1396]
 [4384 5592]]
Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.86      0.75      9985
           1       0.80      0.56      0.66      9976

    accuracy                           0.71     19961
   macro avg       0.73      0.71      0.70     19961
weighted avg       0.73      0.71      0.70     19961



['../models/nb_model.pkl']

### 3.8. Entrenamiento de un Modelo de Red Neuronal

In [11]:
from sklearn.neural_network import MLPClassifier

# Crear una instancia del modelo
mlp = MLPClassifier(random_state=8)

# Entrenar el modelo
mlp.fit(X_train, y_train.values.ravel())

# Hacer predicciones en el conjunto de prueba
y_pred_mlp = mlp.predict(X_test)

# Evaluar el modelo
print("Accuracy:", accuracy_score(y_test, y_pred_mlp))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_mlp))
print("Classification Report:\n", classification_report(y_test, y_pred_mlp))

# Guardar el modelo
joblib.dump(mlp, '../models/mlp_model.pkl')

Accuracy: 0.9019588196984119
Confusion Matrix:
 [[8775 1210]
 [ 747 9229]]
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.88      0.90      9985
           1       0.88      0.93      0.90      9976

    accuracy                           0.90     19961
   macro avg       0.90      0.90      0.90     19961
weighted avg       0.90      0.90      0.90     19961





['../models/mlp_model.pkl']