In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import mlflow
import mlflow.sklearn

# Carga
df = pd.read_excel("datos_utiles.xlsx")


In [2]:
# Variables predictoras y objetivo
X = df.drop(columns="es_elegible_jovenes_u")
y = df["es_elegible_jovenes_u"]

In [6]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

# Separar variables predictoras y objetivo
X = df.drop(columns=["es_elegible_jovenes_u"])
y = df["es_elegible_jovenes_u"]

# Identificar tipos de columnas
numericas = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categoricas = X.select_dtypes(include=['object']).columns.tolist()

# Preprocesamiento
preprocesamiento = ColumnTransformer(transformers=[('num', StandardScaler(), numericas),('cat', OneHotEncoder(handle_unknown='ignore'), categoricas)])
# División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [7]:
# Configuración de MLflow
mlflow.set_tracking_uri("http://localhost:5000")
experiment = mlflow.set_experiment("becas_elegibilidad_modelos")

In [10]:
modelos = {
    "Regresión Logística": LogisticRegression(max_iter=500),
    "Random Forest": RandomForestClassifier(n_estimators=200),
    "Red Neuronal": MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=100, batch_size=512)
}

In [12]:
from sklearn.pipeline import Pipeline
for nombre_modelo, modelo in modelos.items():
    with mlflow.start_run(experiment_id=experiment.experiment_id, run_name=nombre_modelo):
        pipeline = Pipeline(steps=[
            ('preprocesamiento', preprocesamiento),
            ('modelo', modelo)
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        reporte = classification_report(y_test, y_pred)

        print(f"\n📘 {nombre_modelo}")
        print("Matriz de Confusión:\n", cm)
        print("Reporte de Clasificación:\n", reporte)

        mlflow.log_param("modelo", nombre_modelo)
        mlflow.log_metric("accuracy", acc)
        mlflow.sklearn.log_model(pipeline, f"modelo_{nombre_modelo.replace(' ', '_').lower()}")


📘 Regresión Logística
Matriz de Confusión:
 [[162877     96]
 [     0   5414]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162973
           1       0.98      1.00      0.99      5414

    accuracy                           1.00    168387
   macro avg       0.99      1.00      1.00    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Regresión Logística at: http://localhost:5000/#/experiments/645130807814833189/runs/b2817a057122458f9a681f93ed311717
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189

📘 Random Forest
Matriz de Confusión:
 [[162971      2]
 [     0   5414]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162973
           1       1.00      1.00      1.00      5414

    accuracy                           1.00    168387
   macro avg       1.00      1.00      1.00    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Random Forest at: http://localhost:5000/#/experiments/645130807814833189/runs/2811fcd9bc8948c79015de9cda955553
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189

📘 Red Neuronal
Matriz de Confusión:
 [[162973      0]
 [     3   5411]]
Reporte de Clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    162973
           1       1.00      1.00      1.00      5414

    accuracy                           1.00    168387
   macro avg       1.00      1.00      1.00    168387
weighted avg       1.00      1.00      1.00    168387





🏃 View run Red Neuronal at: http://localhost:5000/#/experiments/645130807814833189/runs/efc642547bcc4bb8b12d3bda4fbf5f82
🧪 View experiment at: http://localhost:5000/#/experiments/645130807814833189
