In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import mlflow
import mlflow.sklearn

# Carga
df = pd.read_excel("datos_utiles.xlsx")


In [2]:
# Variables predictoras y objetivo
X = df.drop(columns="es_elegible_beca")
y = df["es_elegible_beca"]

In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

# Identificar tipos de columnas
numericas = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categoricas = X.select_dtypes(include=['object']).columns.tolist()

# Preprocesamiento
preprocesamiento = ColumnTransformer(transformers=[('num', StandardScaler(), numericas),('cat', OneHotEncoder(handle_unknown='ignore'), categoricas)])
# División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [5]:
# Configuración de MLflow
mlflow.set_tracking_uri("http://localhost:5000")
experiment = mlflow.set_experiment("becas_elegibilidad_modelos")

In [15]:
modelos = {
    "Red Neuronal (100,)": MLPClassifier(hidden_layer_sizes=(100,),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42),
    "Red Neuronal (100,50)": MLPClassifier(hidden_layer_sizes=(100,50),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42),
    "Red Neuronal (128, 64, 32)": MLPClassifier(hidden_layer_sizes=(128, 64, 32),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42)
}

In [16]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
for nombre_modelo, modelo in modelos.items():
    with mlflow.start_run(experiment_id=experiment.experiment_id, run_name=nombre_modelo):
        pipeline = Pipeline(steps=[
            ('preprocesamiento', preprocesamiento),
            ('modelo', modelo)
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        reporte = classification_report(y_test, y_pred)

        print(f"\n📘 {nombre_modelo}")
        print("Matriz de Confusión:\n", cm)
        print("Reporte de Clasificación:\n", reporte)

        mlflow.log_param("modelo", nombre_modelo)
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision",accuracy_score(y_test,y_pred))
        mlflow.log_metric("recall",recall_score(y_test, y_pred, zero_division=0))
        mlflow.sklearn.log_model(pipeline, f"modelo_{nombre_modelo.replace(' ', '_').lower()}")


📘 Red Neuronal (100,)
Matriz de Confusión:
 [[162273     73]
 [   159   5882]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162346
           1       0.99      0.97      0.98      6041

    accuracy                           1.00    168387
   macro avg       0.99      0.99      0.99    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Red Neuronal (100,) at: http://localhost:5000/#/experiments/645130807814833189/runs/ce9b2891a67949eb9a61e8b94aa2db31
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189

📘 Red Neuronal (100,50)
Matriz de Confusión:
 [[162233    113]
 [   173   5868]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162346
           1       0.98      0.97      0.98      6041

    accuracy                           1.00    168387
   macro avg       0.99      0.99      0.99    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Red Neuronal (100,50) at: http://localhost:5000/#/experiments/645130807814833189/runs/eac86ebf1a3a4f5889e8de1c52b651d8
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189

📘 Red Neuronal (128, 64, 32)
Matriz de Confusión:
 [[162187    159]
 [   129   5912]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162346
           1       0.97      0.98      0.98      6041

    accuracy                           1.00    168387
   macro avg       0.99      0.99      0.99    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Red Neuronal (128, 64, 32) at: http://localhost:5000/#/experiments/645130807814833189/runs/d650cd2722484778b5df22c725331517
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189


In [17]:
#modelo final
# Configuración de MLflow
mlflow.set_tracking_uri("http://localhost:5000")
experiment = mlflow.set_experiment("modelo_final")
modelo_final = {
    "Red Neuronal (128, 64, 32)": MLPClassifier(hidden_layer_sizes=(128, 64, 32),activation='relu', max_iter=300, batch_size=256,solver='adam',early_stopping=True, random_state=42)
}

2025/05/25 12:30:33 INFO mlflow.tracking.fluent: Experiment with name 'modelo_final' does not exist. Creating a new experiment.


In [18]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
for nombre_modelo, modelo in modelo_final.items():
    with mlflow.start_run(experiment_id=experiment.experiment_id, run_name=nombre_modelo):
        pipeline = Pipeline(steps=[
            ('preprocesamiento', preprocesamiento),
            ('modelo', modelo)
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        reporte = classification_report(y_test, y_pred)

        print(f"\n📘 {nombre_modelo}")
        print("Matriz de Confusión:\n", cm)
        print("Reporte de Clasificación:\n", reporte)

        mlflow.log_param("modelo", nombre_modelo)
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision",accuracy_score(y_test,y_pred))
        mlflow.log_metric("recall",recall_score(y_test, y_pred, zero_division=0))
        mlflow.sklearn.log_model(pipeline, f"modelo_{nombre_modelo.replace(' ', '_').lower()}")


📘 Red Neuronal (128, 64, 32)
Matriz de Confusión:
 [[162187    159]
 [   129   5912]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162346
           1       0.97      0.98      0.98      6041

    accuracy                           1.00    168387
   macro avg       0.99      0.99      0.99    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Red Neuronal (128, 64, 32) at: http://localhost:5000/#/experiments/991763802603381987/runs/c0b2e6561482421d977fa0da288c9aec
🧪 View experiment at: http://localhost:5000/#/experiments/991763802603381987
