In [17]:
# Cargar librer√≠as
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
import mlflow
import mlflow.sklearn


In [18]:
# Leer datos
df = pd.read_csv("data/diabetes.csv")



In [19]:
# identifica las columnas que tiene valores 0
(df==0).sum()

Pregnancies                 111
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                     500
dtype: int64

In [25]:
# Reemplazar ceros en columnas espec√≠ficas por NaN
cols_to_clean = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin']
# No se consideran algunas, por ejemplo Pregnancies (por qu√© si es posible que se tengan cero embarazos)

# Reemplazar ceros por NaN para poder tratarlos como datos faltantes
for col in cols_to_clean:
    df[col] = df[col].replace(0, np.nan)

# Reemplazar NaN con la moda (valor m√°s frecuente) de cada columna
for col in cols_to_clean:
    moda = df[col].mode() # buscar el m√©todo m√°s com√∫n para determinar la moda de una columna
    df[col] = df[col].fillna(moda[0])


In [26]:
# Separar datos
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [27]:
# agregar el traking uri de mlflow
mlflow.set_tracking_uri("http://localhost:9090")
mlflow.set_experiment(experiment_name="ClasificadorDemoDiabetes")
# agregar set_experiment con nombre: ClasificadorDemoDiabetes


<Experiment: artifact_location='mlflow-artifacts:/138666968949628617', creation_time=1747442119650, experiment_id='138666968949628617', last_update_time=1747442119650, lifecycle_stage='active', name='ClasificadorDemoDiabetes', tags={}>

In [28]:
data2 = pd.read_csv("data/logreg_variaciones_educativas.csv")
parametros = data2[["logreg_C", "logreg_max_iter", "solver", "penalty"]]
print(parametros)

   logreg_C  logreg_max_iter     solver penalty
0      0.01              400  liblinear      l2
1      1.12              300      lbfgs      l2
2      2.23              500  liblinear      l2
3      3.34              200  liblinear      l2
4      4.45              300  liblinear      l2
5      5.56              500      lbfgs      l2
6      6.67              300      lbfgs      l2
7      7.78              500      lbfgs      l2
8      8.89              400      lbfgs      l2
9     10.00              200      lbfgs      l2


In [30]:
# Entrenamiento y registro con MLflow
C = 1.0
max_iter = 1000
# usar los par√°metros del archivo logreg_variaciones_educativas.csv
# para los par√°mtros C, max_iter, solver y penalty de LogisticRegresion
# se debe genear un run por cada fila del archivo, usando sus par√°metros
# Analizar que usar


# agregar la l√≠nea del start_run

    # El pipeline usar StandarScaler para que todos los valore num√©ricos est√©n en la misma escala
    # no cambiar dicha l√≠nea
    # LogisticRegresion deben variar sus par√°metros en cada iteraci√≥n.
for idx, row in data2.iterrows():
    
    with mlflow.start_run(run_name=f"practica_0{idx+1}"):
        pipeline = Pipeline([
            ("scaler", StandardScaler()),
            ("clf", LogisticRegression(
                C=row["logreg_C"],
                max_iter=int(row["logreg_max_iter"]),
                solver=row["solver"],
                penalty=row["penalty"]
            ))
        ])
    
        # Entrenar y evaluar
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
    
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
    
        # recuerde que log_param y log_metric debe ir guardando los valores
        # dependiendo del run correspondiente de cada fila
        mlflow.log_param("logreg_C", C)
        mlflow.log_param("logreg_max_iter", max_iter)
        mlflow.log_param("solver", "liblinear")
        mlflow.log_param("penalty", "l2")
        
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        
        # Guardar el pipeline completo
        mlflow.sklearn.log_model(pipeline, "modelo_pipeline")
        
        print(" Modelo registrado en MLflow")
        print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f}")


2025/05/16 20:30:12 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_01 at: http://localhost:9090/#/experiments/138666968949628617/runs/a0cc2a4946534c22a3f6cd29c3c929fc.
2025/05/16 20:30:12 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7208 | Precision: 0.6034


2025/05/16 20:30:14 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_02 at: http://localhost:9090/#/experiments/138666968949628617/runs/83e994d45aed414f8d0923e910f6ff1a.
2025/05/16 20:30:14 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:17 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_03 at: http://localhost:9090/#/experiments/138666968949628617/runs/c96f1eb409c84c7c8cd6500bddca4537.
2025/05/16 20:30:17 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:19 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_04 at: http://localhost:9090/#/experiments/138666968949628617/runs/1f3e9ea6a76c4eb287920bef48fbf528.
2025/05/16 20:30:19 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:22 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_05 at: http://localhost:9090/#/experiments/138666968949628617/runs/299a9bb2fa614710b825e87e647a1732.
2025/05/16 20:30:22 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:26 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_06 at: http://localhost:9090/#/experiments/138666968949628617/runs/5e41f778c47c4fbd975742f91f9b650b.
2025/05/16 20:30:26 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:29 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_07 at: http://localhost:9090/#/experiments/138666968949628617/runs/3245839c98f94a679ac9e419b981eb7c.
2025/05/16 20:30:29 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:31 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_08 at: http://localhost:9090/#/experiments/138666968949628617/runs/1c7391cf77f64a56b2aa1b5d5ee0336b.
2025/05/16 20:30:31 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:34 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_09 at: http://localhost:9090/#/experiments/138666968949628617/runs/7389f774ada04da9ab55272386f081e2.
2025/05/16 20:30:34 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667


2025/05/16 20:30:37 INFO mlflow.tracking._tracking_service.client: üèÉ View run practica_010 at: http://localhost:9090/#/experiments/138666968949628617/runs/e72d419156cc4f8da22bf89c285735e4.
2025/05/16 20:30:37 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:9090/#/experiments/138666968949628617.


 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
