Importar Librerias

In [12]:
import pandas as pd
import numpy as np
import optuna
import mlflow
import mlflow.keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam



  from .autonotebook import tqdm as notebook_tqdm


Generar Variable Objetivo

In [16]:
# Carga del CSV
df = pd.read_csv("data2016.csv")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

# Variable objetivo
materias = [
    'punt_matematicas', 'punt_lectura_critica',
    'punt_ingles', 'punt_c_naturales', 'punt_sociales_ciudadanas'
]
df['materia_menor_puntaje'] = df[materias].idxmin(axis=1)

# Features posibles
features_all = [
    'estu_genero', 'fami_educacionmadre', 'fami_educacionpadre',
    'fami_estratovivienda', 'fami_personashogar', 'fami_cuartoshogar',
    'fami_tieneautomovil', 'fami_tienecomputador',
    'fami_tieneinternet', 'fami_tienelavadora'
]

In [41]:
def objective(trial):
    # ----------------------------------
    # 🔹 Selección de features (como antes)
    selected_features = [f for f in features_all if trial.suggest_categorical(f"usar_" + f, [True, False])]
    if not selected_features:
        return 0

    X = df[selected_features].copy()
    y = df['materia_menor_puntaje']

    for col in X.select_dtypes(include='object').columns:
        X[col] = LabelEncoder().fit_transform(X[col].astype(str))

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    y_encoded = LabelEncoder().fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

    # ----------------------------------
    # 🔹 Hiperparámetros de red neuronal
    n_layers = trial.suggest_int("n_layers", 1, 4)
    activation = trial.suggest_categorical("activation", ["relu", "tanh", "elu"])
    dropout_rate = trial.suggest_float("dropout", 0.2, 0.5)
    learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])

    # Cantidad de neuronas por capa
    hidden_units = [trial.suggest_int(f"n_units_{i}", 32, 128) for i in range(n_layers)]

    # ----------------------------------
    # 🔹 Modelo dinámico
    model = Sequential()
    model.add(Dense(hidden_units[0], activation=activation, input_shape=(X_train.shape[1],)))
    model.add(Dropout(dropout_rate))

    for i in range(1, n_layers):
        model.add(Dense(hidden_units[i], activation=activation))
        model.add(Dropout(dropout_rate))

    model.add(Dense(5, activation='softmax'))

    # ----------------------------------
    # 🔹 Compilación y entrenamiento
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    with mlflow.start_run(nested=True):
        mlflow.log_params({
            "n_layers": n_layers,
            "activation": activation,
            "dropout": dropout_rate,
            "learning_rate": learning_rate,
            "batch_size": batch_size,
            "features": selected_features,
            **{f"n_units_{i}": hidden_units[i] for i in range(n_layers)}
        })

        model.fit(X_train, y_train, validation_split=0.2, epochs=20, batch_size=batch_size, verbose=0)
        loss, acc = model.evaluate(X_test, y_test, verbose=0)

        mlflow.log_metric("accuracy", acc)
        mlflow.keras.log_model(model, "modelo_keras")

    return acc


In [42]:
mlflow.set_experiment("icfes_red_neuronal_optuna")

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

[I 2025-05-24 21:49:10,110] A new study created in memory with name: no-name-ba7b1ca7-cf42-4c07-bb1e-d9308996c7a4

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

[W 2025-05-24 21:50:15,432] Trial 0 failed with parameters: {'usar_estu_genero': True, 'usar_fami_educacionmadre': True, 'usar_fami_educacionpadre': False, 'usar_fami_estratovivienda': False, 'usar_fami_personashogar': False, 'usar_fami_cuartoshogar': False, 'usar_fami_tieneautomovil': True, 'usar_fami_tienecomputador': True, 'usar_fami_tieneinternet': False, 'usar_fami_tienelavadora': False, 'n_layers': 2, 'activation': 'tanh', 'dropout': 0.357997666082717, 'lr': 0.0006528897872003062, 'batch_size': 32, 'n_units_0': 94, 'n_units_1': 113} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\ASUS\AppData\Local\Programs\Python\Python311\Lib\site-packages\

KeyboardInterrupt: 

In [20]:
print("Mejor accuracy:", study.best_value)
print("Mejores hiperparámetros:", study.best_params)


Mejor accuracy: 0.3122991919517517
Mejores hiperparámetros: {'usar_estu_genero': True, 'usar_fami_educacionmadre': False, 'usar_fami_educacionpadre': True, 'usar_fami_estratovivienda': True, 'usar_fami_personashogar': True, 'usar_fami_cuartoshogar': True, 'usar_fami_tieneautomovil': False, 'usar_fami_tienecomputador': False, 'usar_fami_tieneinternet': False, 'usar_fami_tienelavadora': False, 'n_units1': 86, 'n_units2': 25, 'dropout': 0.3975898240180797, 'lr': 0.0005149688462933546}


In [30]:
!pip uninstall -y nbformat && pip install nbformat==5.9.2


Found existing installation: nbformat 5.10.4
Uninstalling nbformat-5.10.4:
  Successfully uninstalled nbformat-5.10.4
Collecting nbformat==5.9.2
  Downloading nbformat-5.9.2-py3-none-any.whl.metadata (3.4 kB)
Downloading nbformat-5.9.2-py3-none-any.whl (77 kB)
   ---------------------------------------- 0.0/77.6 kB ? eta -:--:--
   ---------- ----------------------------- 20.5/77.6 kB 640.0 kB/s eta 0:00:01
   ---------------------------------------- 77.6/77.6 kB 1.4 MB/s eta 0:00:00
Installing collected packages: nbformat
Successfully installed nbformat-5.9.2



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [35]:
import optuna.visualization as vis

fig = vis.plot_param_importances(study)
fig.write_html("importancia_variables.html")

In [38]:
import optuna.visualization as vis

# Mostrar evolución de accuracy
fig_history = vis.plot_optimization_history(study)
fig_history.write_html("accuracy_vs_trial.html")

In [39]:
print("🔍 Mejor accuracy alcanzado:", study.best_value)
print("\n🏆 Hiperparámetros óptimos encontrados:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

🔍 Mejor accuracy alcanzado: 0.3122991919517517

🏆 Hiperparámetros óptimos encontrados:
  usar_estu_genero: True
  usar_fami_educacionmadre: False
  usar_fami_educacionpadre: True
  usar_fami_estratovivienda: True
  usar_fami_personashogar: True
  usar_fami_cuartoshogar: True
  usar_fami_tieneautomovil: False
  usar_fami_tienecomputador: False
  usar_fami_tieneinternet: False
  usar_fami_tienelavadora: False
  n_units1: 86
  n_units2: 25
  dropout: 0.3975898240180797
  lr: 0.0005149688462933546
