In [12]:
# Cargar librerías
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
import mlflow
import mlflow.sklearn


In [13]:
# Leer datos
df = pd.read_csv("data/diabetes.csv")



In [14]:
# identifica las columnas que tiene valores 0
(df==0).sum()

Pregnancies                 111
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                     500
dtype: int64

In [15]:
# Reemplazar ceros en columnas específicas por NaN
cols_to_clean = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin']
# No se consideran algunas, por ejemplo Pregnancies (por qué si es posible que se tengan cero embarazos)

# Reemplazar ceros por NaN para poder tratarlos como datos faltantes
for col in cols_to_clean:
    df[col] = df[col].replace(0, np.nan)

# Reemplazar NaN con la moda (valor más frecuente) de cada columna
for col in cols_to_clean:
    moda = df[col].mode() # buscar el método más común para determinar la moda de una columna
    df[col] = df[col].fillna(moda[0])


In [16]:
# Separar datos
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
# agregar el traking uri de mlflow
mlflow.set_tracking_uri("http://127.0.0.1:9090")

# agregar set_experiment con nombre: ClasificadorDemoDiabetes
mlflow.set_experiment("ClasificadorDemoDiabetes")


<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1747891161949, experiment_id='1', last_update_time=1747891161949, lifecycle_stage='active', name='ClasificadorDemoDiabetes', tags={}>

In [18]:
data2 = pd.read_csv("data/logreg_variaciones_educativas.csv")
parametros = data2[["logreg_C", "logreg_max_iter", "solver", "penalty"]]
print(parametros)

   logreg_C  logreg_max_iter     solver penalty
0      0.01              400  liblinear      l2
1      1.12              300      lbfgs      l2
2      2.23              500  liblinear      l2
3      3.34              200  liblinear      l2
4      4.45              300  liblinear      l2
5      5.56              500      lbfgs      l2
6      6.67              300      lbfgs      l2
7      7.78              500      lbfgs      l2
8      8.89              400      lbfgs      l2
9     10.00              200      lbfgs      l2


In [24]:
# Entrenamiento y registro con MLflow
C = 1.0
max_iter = 1000
# usar los parámetros del archivo logreg_variaciones_educativas.csv
# para los parámtros C, max_iter, solver y penalty de LogisticRegresion
# se debe genear un run por cada fila del archivo, usando sus parámetros
# Analizar que usar
# El pipeline usar StandarScaler para que todos los valore numéricos estén en la misma escala
    # no cambiar dicha línea
    # LogisticRegresion deben variar sus parámetros en cada iteración.

In [25]:
# Cerrar cualquier run activo antes de iniciar
if mlflow.active_run():
    mlflow.end_run()

for idx, row in data2.iterrows():
    with mlflow.start_run(run_name=f"LR_Run_Practica_0_Byron_Bravo_{idx+1}"):
        # Crear pipeline con parámetros de la fila
        pipeline = Pipeline([
            ("scaler", StandardScaler()),
            ("clf", LogisticRegression(
                C=row["logreg_C"],
                max_iter=int(row["logreg_max_iter"]),
                solver=row["solver"],
                penalty=row["penalty"]
            ))
        ])

        # Entrenar
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        # Métricas
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)

        # Log de parámetros y métricas
        mlflow.log_param("logreg_C", row["logreg_C"])
        mlflow.log_param("logreg_max_iter", row["logreg_max_iter"])
        mlflow.log_param("solver", row["solver"])
        mlflow.log_param("penalty", row["penalty"])
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)

        # Guardar el modelo
        mlflow.sklearn.log_model(pipeline, "modelo_pipeline")

        print(f"✅ Run {idx+1} completado: Accuracy={acc:.4f} | Precision={prec:.4f}")


🏃 View run gentle-shrike-960 at: http://127.0.0.1:9090/#/experiments/1/runs/a97d788479a7446d8a1ea45bdd895472
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 1 completado: Accuracy=0.7208 | Precision=0.6034
🏃 View run LR_Run_Practica_0_Byron_Bravo_1 at: http://127.0.0.1:9090/#/experiments/1/runs/34c22570c2dc4b8891ecb4daf7bbb22c
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 2 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_2 at: http://127.0.0.1:9090/#/experiments/1/runs/931fef0389914aae907c10023bd9a30c
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 3 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_3 at: http://127.0.0.1:9090/#/experiments/1/runs/c2c6edb51dd147aeb7e07eefc9e1c41b
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 4 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_4 at: http://127.0.0.1:9090/#/experiments/1/runs/cc74e19c4ef84eb8ba747e8a640a5a78
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 5 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_5 at: http://127.0.0.1:9090/#/experiments/1/runs/2b519899a32048d79d079dec2a01ebc7
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 6 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_6 at: http://127.0.0.1:9090/#/experiments/1/runs/fb9c0e94a3d340b8bab092a3993df02c
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 7 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_7 at: http://127.0.0.1:9090/#/experiments/1/runs/fa8b05c171e445e18c57432d6f235adc
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 8 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_8 at: http://127.0.0.1:9090/#/experiments/1/runs/2add935e24bb4f87954ca45b594613d7
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 9 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_9 at: http://127.0.0.1:9090/#/experiments/1/runs/e338fac14d434e3d9c9d80d04d09b75e
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




✅ Run 10 completado: Accuracy=0.7532 | Precision=0.6667
🏃 View run LR_Run_Practica_0_Byron_Bravo_10 at: http://127.0.0.1:9090/#/experiments/1/runs/f5366bff17664034a6e0f5fe1878718e
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1
