In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import mlflow
import mlflow.sklearn

# Carga
df = pd.read_excel("datos_utiles.xlsx")


In [24]:
# Variables predictoras y objetivo
X = df.drop(columns="es_elegible_beca")
y = df["es_elegible_beca"]

In [25]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

# Identificar tipos de columnas
numericas = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categoricas = X.select_dtypes(include=['object']).columns.tolist()

# Preprocesamiento
preprocesamiento = ColumnTransformer(transformers=[('num', StandardScaler(), numericas),('cat', OneHotEncoder(handle_unknown='ignore'), categoricas)])
# División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [26]:
# Configuración de MLflow
mlflow.set_tracking_uri("http://localhost:5000")
experiment = mlflow.set_experiment("becas_elegibilidad_modelos")

In [27]:
modelos = {
    "Red Neuronal (100,)": MLPClassifier(hidden_layer_sizes=(100,),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42),
    "Red Neuronal (100,50)": MLPClassifier(hidden_layer_sizes=(100,50),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42),
    "Red Neuronal (128, 64, 32)": MLPClassifier(hidden_layer_sizes=(128, 64, 32),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42),
    "Red Neuronal (100, 50, 25)": MLPClassifier(hidden_layer_sizes=(100, 50, 25),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42)
}

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
for nombre_modelo, modelo in modelos.items():
    with mlflow.start_run(experiment_id=experiment.experiment_id, run_name=nombre_modelo):
        pipeline = Pipeline(steps=[
            ('preprocesamiento', preprocesamiento),
            ('modelo', modelo)
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        reporte = classification_report(y_test, y_pred)

        print(f"\n📘 {nombre_modelo}")
        print("Matriz de Confusión:\n", cm)
        print("Reporte de Clasificación:\n", reporte)

        mlflow.log_param("modelo", nombre_modelo)
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision",accuracy_score(y_test,y_pred))
        mlflow.log_metric("recall",recall_score(y_test, y_pred, zero_division=0))
        mlflow.sklearn.log_model(pipeline, f"modelo_{nombre_modelo.replace(' ', '_').lower()}")


📘 Red Neuronal (100,)
Matriz de Confusión:
 [[161091    105]
 [   132   5906]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    161196
           1       0.98      0.98      0.98      6038

    accuracy                           1.00    167234
   macro avg       0.99      0.99      0.99    167234
weighted avg       1.00      1.00      1.00    167234





🏃 View run Red Neuronal (100,) at: http://localhost:5000/#/experiments/645130807814833189/runs/9d451f75daec4723b6f6b33034aca293
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189

📘 Red Neuronal (100,50)
Matriz de Confusión:
 [[161056    140]
 [   211   5827]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    161196
           1       0.98      0.97      0.97      6038

    accuracy                           1.00    167234
   macro avg       0.99      0.98      0.98    167234
weighted avg       1.00      1.00      1.00    167234





🏃 View run Red Neuronal (100,50) at: http://localhost:5000/#/experiments/645130807814833189/runs/710b363ce01245578ddc4ce1ee344c28
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189


In [17]:
#modelo final
# Configuración de MLflow
mlflow.set_tracking_uri("http://localhost:5000")
experiment = mlflow.set_experiment("modelo_final")
modelo_final = {
    "Red Neuronal (100, 50, 25)": MLPClassifier(hidden_layer_sizes=(100, 50, 25),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42)
}

2025/05/25 12:30:33 INFO mlflow.tracking.fluent: Experiment with name 'modelo_final' does not exist. Creating a new experiment.


In [18]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
for nombre_modelo, modelo in modelo_final.items():
    with mlflow.start_run(experiment_id=experiment.experiment_id, run_name=nombre_modelo):
        pipeline = Pipeline(steps=[
            ('preprocesamiento', preprocesamiento),
            ('modelo', modelo)
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        reporte = classification_report(y_test, y_pred)

        print(f"\n📘 {nombre_modelo}")
        print("Matriz de Confusión:\n", cm)
        print("Reporte de Clasificación:\n", reporte)

        mlflow.log_param("modelo", nombre_modelo)
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision",accuracy_score(y_test,y_pred))
        mlflow.log_metric("recall",recall_score(y_test, y_pred, zero_division=0))
        mlflow.sklearn.log_model(pipeline, f"modelo_{nombre_modelo.replace(' ', '_').lower()}")


📘 Red Neuronal (128, 64, 32)
Matriz de Confusión:
 [[162187    159]
 [   129   5912]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162346
           1       0.97      0.98      0.98      6041

    accuracy                           1.00    168387
   macro avg       0.99      0.99      0.99    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Red Neuronal (128, 64, 32) at: http://localhost:5000/#/experiments/991763802603381987/runs/c0b2e6561482421d977fa0da288c9aec
🧪 View experiment at: http://localhost:5000/#/experiments/991763802603381987


In [22]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping

# Aplicar preprocesamiento a los datos
X_train_proc = preprocesamiento.fit_transform(X_train)
X_test_proc = preprocesamiento.transform(X_test)

# Definir modelo secuencial
modelo = Sequential([
    Dense(100, activation='relu', input_shape=(X_train_proc.shape[1],)),
    Dense(50, activation='relu'),
    Dense(25, activation='relu'),
    Dense(1, activation='sigmoid')  # Para clasificación binaria
])

# Compilar el modelo
modelo.compile(optimizer='adam',
               loss='binary_crossentropy',
               metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Entrenar el modelo
historial = modelo.fit(X_train_proc, y_train, 
                       epochs=100, 
                       batch_size=32, 
                       validation_split=0.2,
                       verbose=1)

# Evaluación
loss, accuracy = modelo.evaluate(X_test_proc, y_test)
print(f"Accuracy en test: {accuracy:.4f}")

# Predicciones y umbral
y_pred_prob = modelo.predict(X_test_proc)
y_pred = (y_pred_prob > 0.5).astype("int")

# Reporte de métricas
from sklearn.metrics import classification_report, confusion_matrix
print("Matriz de confusión:\n", confusion_matrix(y_test, y_pred))
print("Reporte de clasificación:\n", classification_report(y_test, y_pred))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m9823/9823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - accuracy: 0.9892 - loss: 0.0325 - val_accuracy: 0.9957 - val_loss: 0.0106
Epoch 2/100
[1m9823/9823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - accuracy: 0.9956 - loss: 0.0101 - val_accuracy: 0.9945 - val_loss: 0.0141
Epoch 3/100
[1m9823/9823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9965 - loss: 0.0083 - val_accuracy: 0.9968 - val_loss: 0.0085
Epoch 4/100
[1m9823/9823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.9969 - loss: 0.0072 - val_accuracy: 0.9973 - val_loss: 0.0077
Epoch 5/100
[1m9823/9823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.9975 - loss: 0.0060 - val_accuracy: 0.9967 - val_loss: 0.0081
Epoch 6/100
[1m9823/9823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.9974 - loss: 0.0059 - val_accuracy: 0.9967 - val_loss: 0.0086
Epoc