In [2]:
import sys
sys.path.insert(0, r'C:\Users\Administrator\Documents\Proyecto seg clientes refuerzo\myenv\Lib\site-packages')

import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dropout
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

# Cargar los datos como en tu código original
# Ruta del archivo
ruta = r'C:/Users/Administrator/Documents/Proyecto seg clientes refuerzo/Proyecto_seg_clientes/Data/teleCust1000t_listo.csv'

# Cargar el dataset
df = pd.read_csv(ruta)

# Ver las primeras filas
print(df.head())
print(df.info())

# Separar las características (X) y la variable objetivo (y)
X = df.drop('custcat', axis=1)  # Reemplaza 'custcat' por tu variable objetivo
y = df['custcat']

# Dividir el dataset en conjunto de entrenamiento, validación y test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Escalar las características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Codificar la variable objetivo (One-Hot Encoding)
y_train_encoded = to_categorical(y_train - 1)  # Ajustar para que las clases empiecen en 0
y_val_encoded = to_categorical(y_val - 1)
y_test_encoded = to_categorical(y_test - 1)

# Definir el EarlyStopping como variable global
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

def create_model(trial):
    # Optimización de hiperparámetros
    # Definir el número de neuronas por capa
    num_neurons_1 = trial.suggest_int('num_neurons_1', 32, 128)
    num_neurons_2 = trial.suggest_int('num_neurons_2', 16, 64)
    
    # Regularización L2
    l2_reg = trial.suggest_loguniform('l2_reg', 1e-5, 1e-2)
    
    # Tasa de aprendizaje
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    
    # Crear el modelo
    model = Sequential([
        Dense(num_neurons_1, activation='elu', kernel_regularizer=l2(l2_reg), input_dim=X_train_scaled.shape[1]),
        Dense(num_neurons_2, activation='elu', kernel_regularizer=l2(l2_reg)),
        Dense(y_train_encoded.shape[1], activation='softmax')
    ])
    
    # Compilar el modelo
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

def objective(trial):
    # Crear el modelo con los hiperparámetros sugeridos
    model = create_model(trial)
    
    # Entrenar el modelo
    history = model.fit(
        X_train_scaled, y_train_encoded,
        epochs=200,
        batch_size=8,
        validation_data=(X_val_scaled, y_val_encoded),
        callbacks=[early_stopping],
        verbose=0
    )
    
    # Obtener la exactitud en el conjunto de validación
    val_accuracy = history.history['val_accuracy'][-1]
    
    return val_accuracy  # Maximizar la exactitud en validación

# Crear el estudio de Optuna
study = optuna.create_study(direction='maximize')

# Ejecutar la optimización
study.optimize(objective, n_trials=50)

# Imprimir los mejores hiperparámetros encontrados
print("Mejores hiperparámetros:")
print(study.best_params)

# Evaluar el modelo con los mejores hiperparámetros en el conjunto de test
best_model = create_model(study.best_trial)
best_model.fit(
    X_train_scaled, y_train_encoded,
    epochs=200,
    batch_size=8,
    validation_data=(X_val_scaled, y_val_encoded),
    callbacks=[early_stopping],
    verbose=1
)

# Evaluar en test
test_loss, test_accuracy = best_model.evaluate(X_test_scaled, y_test_encoded, verbose=0)
print(f"Exactitud en test con los mejores hiperparámetros: {test_accuracy:.4f}")


[I 2024-11-29 13:56:48,276] A new study created in memory with name: no-name-9b4aff23-c135-4371-b757-6d5e402da6a3


   region  tenure  age  marital  address  income  ed  employ  retire  gender  \
0       2      13   44        1        9    64.0   4       5       0       0   
1       3      11   33        1        7   136.0   5       5       0       0   
2       3      68   52        1       24   116.0   1      29       0       1   
3       2      33   33        0       12    33.0   2       0       0       1   
4       2      23   30        1        9    30.0   1       2       0       0   

   reside  custcat  
0       2        1  
1       6        4  
2       2        3  
3       1        1  
4       4        3  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   region   1000 non-null   int64  
 1   tenure   1000 non-null   int64  
 2   age      1000 non-null   int64  
 3   marital  1000 non-null   int64  
 4   address  1000 non-null   int64  
 5   income   1000 no

  l2_reg = trial.suggest_loguniform('l2_reg', 1e-5, 1e-2)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
[I 2024-11-29 13:56:59,952] Trial 0 finished with value: 0.4333333373069763 and parameters: {'num_neurons_1': 100, 'num_neurons_2': 54, 'l2_reg': 1.0370190562492336e-05, 'learning_rate': 5.746171419665049e-05}. Best is trial 0 with value: 0.4333333373069763.
[I 2024-11-29 13:57:17,726] Trial 1 finished with value: 0.40666666626930237 and parameters: {'num_neurons_1': 74, 'num_neurons_2': 57, 'l2_reg': 0.0024684914244041518, 'learning_rate': 0.0016821641867665563}. Best is trial 0 with value: 0.4333333373069763.
[I 2024-11-29 13:57:29,176] Trial 2 finished with value: 0.3933333456516266 and parameters: {'num_neurons_1': 95, 'num_neurons_2': 24, 'l2_reg': 0.002561166192806515, 'learning_rate': 0.001548700664237779}. Best is trial 0 with value: 0.4333333373069763.
[I 2024-11-29 13:58:15,181

Mejores hiperparámetros:
{'num_neurons_1': 100, 'num_neurons_2': 54, 'l2_reg': 1.0370190562492336e-05, 'learning_rate': 5.746171419665049e-05}
Epoch 1/200
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.3242 - loss: 1.3991 - val_accuracy: 0.3467 - val_loss: 1.3774
Epoch 2/200
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3600 - loss: 1.3418 - val_accuracy: 0.3400 - val_loss: 1.3478
Epoch 3/200
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3719 - loss: 1.3125 - val_accuracy: 0.3667 - val_loss: 1.3309
Epoch 4/200
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3580 - loss: 1.2875 - val_accuracy: 0.3600 - val_loss: 1.3171
Epoch 5/200
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4278 - loss: 1.2492 - val_accuracy: 0.3800 - val_loss: 1.3098
Epoch 6/200
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━