In [1]:
# Cargar librerías
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
import mlflow
import mlflow.sklearn


In [3]:
# Leer datos
df = pd.read_csv("data/diabetes.csv")



In [4]:
# identifica las columnas que tiene valores 0
(df==0).sum()

Pregnancies                 111
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                     500
dtype: int64

In [5]:
# Reemplazar ceros en columnas específicas por NaN
cols_to_clean = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin']
# No se consideran algunas, por ejemplo Pregnancies (por qué si es posible que se tengan cero embarazos)

# Reemplazar ceros por NaN para poder tratarlos como datos faltantes
for col in cols_to_clean:
    df[col] = df[col].replace(0, np.nan)

# Reemplazar NaN con la moda (valor más frecuente) de cada columna
for col in cols_to_clean:
    # buscar la moda
    moda = df[col].mode()[0]
    df[col] = df[col].fillna(moda)


In [6]:
# Separar datos
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# agregar el traking uri de mlflow
mlflow.set_tracking_uri("http://127.0.0.1:9090")
mlflow.set_experiment(experiment_name="ClasificadorDemoDiabetes")
# agregar set_experiment con nombre: ClasificadorDemoDiabetes


2025/05/16 20:05:17 INFO mlflow.tracking.fluent: Experiment with name 'ClasificadorDemoDiabetes' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1747443917057, experiment_id='1', last_update_time=1747443917057, lifecycle_stage='active', name='ClasificadorDemoDiabetes', tags={}>

In [8]:
parametros_df = pd.read_csv("data/logreg_variaciones_educativas.csv")


In [12]:
#prueba para ver si funciona el iterador
for idx, fila in parametros_df.iterrows():
    print(fila["logreg_C"])
    print(fila["logreg_max_iter"])
    print(fila["solver"])
    print(fila["penalty"])

0.01
400
liblinear
l2
1.12
300
lbfgs
l2
2.23
500
liblinear
l2
3.34
200
liblinear
l2
4.45
300
liblinear
l2
5.5600000000000005
500
lbfgs
l2
6.67
300
lbfgs
l2
7.78
500
lbfgs
l2
8.89
400
lbfgs
l2
10.0
200
lbfgs
l2


In [16]:
for idx, fila in parametros_df.iterrows():
    with mlflow.start_run():
        # El pipeline usar StandarScaler para que todos los valore numéricos estén en la misma escala
        # no cambiar dicha línea
        # LogisticRegresion deben variar sus parámetros en cada iteración.
        pipeline = Pipeline([
                ("scaler", StandardScaler()),
                ("clf", LogisticRegression(
                    C=fila["logreg_C"],
                    max_iter=fila["logreg_max_iter"],
                    solver=fila["solver"],
                    penalty=fila["penalty"]
                ))
            ])
        
        # Entrenar y evaluar
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
    
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
    
        # recuerde que log_param y log_metric debe ir guardando los valores
        # dependiendo del run correspondiente de cada fila
        mlflow.log_param("logreg_C", fila["logreg_C"])
        mlflow.log_param("logreg_max_iter", fila["logreg_max_iter"])
        mlflow.log_param("solver", fila["solver"])
        mlflow.log_param("penalty", fila["penalty"])
        
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        
        # Guardar el pipeline completo
        mlflow.sklearn.log_model(pipeline, "modelo_pipeline")
        
        print(" Modelo registrado en MLflow")
        print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f}")




 Modelo registrado en MLflow
Accuracy: 0.7208 | Precision: 0.6034
🏃 View run fun-mink-420 at: http://127.0.0.1:9090/#/experiments/1/runs/72d51c33101d48d19cd9cb2be9c5629b
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run casual-hen-40 at: http://127.0.0.1:9090/#/experiments/1/runs/acd7baefb4e54461bfd82866a24c2da1
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run auspicious-foal-275 at: http://127.0.0.1:9090/#/experiments/1/runs/08a4ba47b2f442ffa7e750d62ee9a06d
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run judicious-dove-243 at: http://127.0.0.1:9090/#/experiments/1/runs/212dea993666480791df1a20581222cf
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run zealous-newt-451 at: http://127.0.0.1:9090/#/experiments/1/runs/f026352a0aa94788b26ef181caec658d
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run zealous-snipe-734 at: http://127.0.0.1:9090/#/experiments/1/runs/65e4d2f0cb184c3fb9ec93bd60150570
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run capable-worm-667 at: http://127.0.0.1:9090/#/experiments/1/runs/e84c0f8d97474cb6baf8b87b5ca7b35e
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run nervous-grouse-684 at: http://127.0.0.1:9090/#/experiments/1/runs/6ea5d293eb374bf48da14b7cb7913d12
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run handsome-moth-216 at: http://127.0.0.1:9090/#/experiments/1/runs/5cd870d8b12f435f88a09e8b85407fc9
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run gentle-shrike-465 at: http://127.0.0.1:9090/#/experiments/1/runs/61d9fabb94ce4a8d93ad8c386d05acd5
🧪 View experiment at: http://127.0.0.1:9090/#/experiments/1
