In [1]:
# Cargar librerías
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
import mlflow
import mlflow.sklearn


In [3]:
# Leer datos
df = pd.read_csv("diabetes.csv")



In [4]:
# identifica las columnas que tiene valores 0
(df==0).sum()

Pregnancies                 111
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                     500
dtype: int64

In [5]:
# Reemplazar ceros en columnas específicas por NaN
cols_to_clean = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin']
# No se consideran algunas, por ejemplo Pregnancies (por qué si es posible que se tengan cero embarazos)

# Reemplazar ceros por NaN para poder tratarlos como datos faltantes
for col in cols_to_clean:
    df[col] = df[col].replace(0, np.nan)

# Reemplazar NaN con la moda (valor más frecuente) de cada columna
for col in cols_to_clean:
    moda = df[col].mode()[0]# buscar el método más común para determinar la moda de una columna
    df[col] = df[col].fillna(moda)


In [6]:
# Separar datos
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# agregar el traking uri de mlflow
mlflow.set_tracking_uri("http://localhost:9090")
# agregar set_experiment con nombre: ClasificadorDemoDiabetes
mlflow.set_experiment("ClasificadorDemoDiabetes")

2025/05/16 20:03:09 INFO mlflow.tracking.fluent: Experiment with name 'ClasificadorDemoDiabetes' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1747443788946, experiment_id='1', last_update_time=1747443788946, lifecycle_stage='active', name='ClasificadorDemoDiabetes', tags={}>

In [12]:
# Entrenamiento y registro con MLflow
#C = 1.0
#max_iter = 1000
# usar los parámetros del archivo logreg_variaciones_educativas.csv
# para los parámtros C, max_iter, solver y penalty de LogisticRegresion
# se debe genear un run por cada fila del archivo, usando sus parámetros
# Analizar que usar

dfparametros = pd.read_csv("logreg_variaciones_educativas.csv")

for idx, row in dfparametros.iterrows():
    C = row['logreg_C']
    max_iter = row['logreg_max_iter']
    solver = row['solver']
    penalty = row['penalty']

# agregar la línea del start_run
    with mlflow.start_run():
    # El pipeline usar StandarScaler para que todos los valore numéricos estén en la misma escala
    # no cambiar dicha línea
    # LogisticRegresion deben variar sus parámetros en cada iteración.
        pipeline = Pipeline([
                ("scaler", StandardScaler()),
                ("clf", LogisticRegression(
                    C=C,
                    max_iter=max_iter,
                    solver=solver,
                    penalty=penalty
                ))
            ])
    
    # Entrenar y evaluar
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)

    # recuerde que log_param y log_metric debe ir guardando los valores
    # dependiendo del run correspondiente de cada fila
        mlflow.log_param("logreg_C", C)
        mlflow.log_param("logreg_max_iter", max_iter)
        mlflow.log_param("solver", "liblinear")
        mlflow.log_param("penalty", "l2")
    
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
    
    # Guardar el pipeline completo
        mlflow.sklearn.log_model(pipeline, "modelo_pipeline")
    
        print(" Modelo registrado en MLflow")
        print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f}")




 Modelo registrado en MLflow
Accuracy: 0.7208 | Precision: 0.6034
🏃 View run kindly-gnat-626 at: http://localhost:9090/#/experiments/1/runs/a0fe1213c7e44dc39fbdcb1eebda48b3
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run polite-turtle-967 at: http://localhost:9090/#/experiments/1/runs/69bb7d3d60c0470284c7abe55797030e
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run valuable-vole-419 at: http://localhost:9090/#/experiments/1/runs/ada97494ac804399bc817da9ac29b5e4
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run painted-rook-433 at: http://localhost:9090/#/experiments/1/runs/e98a8ba4cb8342cea636469325758062
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run serious-jay-108 at: http://localhost:9090/#/experiments/1/runs/83d62a1e5e8c47aab69ef8dc849f3f31
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run mercurial-wren-655 at: http://localhost:9090/#/experiments/1/runs/b55a5169d5bb41d69d3106b80fa51207
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run placid-hare-191 at: http://localhost:9090/#/experiments/1/runs/49610d80668c4fcb9776f25f902e06b0
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run dapper-duck-658 at: http://localhost:9090/#/experiments/1/runs/f9dba4eb2c454e39af522eadc8aca9bb
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run rumbling-gnu-143 at: http://localhost:9090/#/experiments/1/runs/e6287a132e484c70bd8c2c40d95ee8e6
🧪 View experiment at: http://localhost:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run funny-shrike-326 at: http://localhost:9090/#/experiments/1/runs/c66fe6e530b34ba3841147f82f3cad26
🧪 View experiment at: http://localhost:9090/#/experiments/1
