In [6]:
#!pip install scikit-learn
#!pip install pandas
!pip install mlflow

Collecting mlflow
  Obtaining dependency information for mlflow from https://files.pythonhosted.org/packages/28/8f/b3bdbaf3f060801e5d8b7b7f3b67c79e3d12939c67002f9a618150317a29/mlflow-2.22.0-py3-none-any.whl.metadata
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.22.0 (from mlflow)
  Obtaining dependency information for mlflow-skinny==2.22.0 from https://files.pythonhosted.org/packages/f4/eb/53dd2a5db1040a21da2980c382ebe3a9bda2d8af8365c2d01053c924b150/mlflow_skinny-2.22.0-py3-none-any.whl.metadata
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl.metadata (31 kB)
Collecting Flask<4 (from mlflow)
  Obtaining dependency information for Flask<4 from https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl.metadata
  Downloading flask-3.1.1-py3-none-any.whl.metadata (3.0 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Obtaining dependency information for alembic!=1.1


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
# Cargar librerías
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
import mlflow
import mlflow.sklearn

In [9]:
# Leer datos
df = pd.read_csv("data/diabetes.csv")

In [10]:
# identifica las columnas que tiene valores 0
(df==0).sum()

Pregnancies                 111
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                     500
dtype: int64

In [13]:
# Reemplazar ceros en columnas específicas por NaN
cols_to_clean = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin']
# No se consideran algunas, por ejemplo Pregnancies (por qué si es posible que se tengan cero embarazos)

# Reemplazar ceros por NaN para poder tratarlos como datos faltantes
for col in cols_to_clean:
    df[col] = df[col].replace(0, np.nan)

# Reemplazar NaN con la moda (valor más frecuente) de cada columna
for col in cols_to_clean:
    moda = df[col].mode()[0]# buscar el método más común para determinar la moda de una columna
    df[col] = df[col].fillna(moda)


In [14]:
# Separar datos
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
# agregar el traking uri de mlflow
mlflow.set_tracking_uri("http://localhost:9090/")
# agregar set_experiment con nombre: ClasificadorDemoDiabetes
mlflow.set_experiment(experiment_name="ClasificadorDemoDiabetes")

2025/05/21 21:37:30 INFO mlflow.tracking.fluent: Experiment with name 'ClasificadorDemoDiabetes' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1747881450937, experiment_id='1', last_update_time=1747881450937, lifecycle_stage='active', name='ClasificadorDemoDiabetes', tags={}>

In [22]:
param_df = pd.read_csv("data/logreg_variaciones_educativas.csv")
print("Columnas del archivo:", list(param_df.columns))

Columnas del archivo: ['run_id', 'logreg_C', 'logreg_max_iter', 'solver', 'penalty']


In [27]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score

# Cargar los parámetros desde el archivo CSV
param_df = pd.read_csv("data/logreg_variaciones_educativas.csv")

for idx, row in param_df.iterrows():
    C = float(row["logreg_C"])
    max_iter = int(row["logreg_max_iter"])
    solver = row["solver"]
    penalty = row["penalty"]
    run_id = row["run_id"]
    
    with mlflow.start_run(run_name=run_id):
        pipeline = Pipeline([
            ("scaler", StandardScaler()),
            ("clf", LogisticRegression(
                C=C,
                max_iter=max_iter,
                solver=solver,
                penalty=penalty
            ))
        ])

        # Entrenar y evaluar
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)

        # Registrar hiperparámetros
        mlflow.log_param("logreg_C", C)
        mlflow.log_param("logreg_max_iter", max_iter)
        mlflow.log_param("solver", solver)
        mlflow.log_param("penalty", penalty)

        # Registrar métricas
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)

        # Guardar el modelo
        mlflow.sklearn.log_model(pipeline, "modelo_pipeline")

        from mlflow.tracking import MlflowClient

        # Obtener el run_id actual
        run_id = mlflow.active_run().info.run_id
        
        # Registrar el modelo en el Model Registry con el nombre 'model1'
        result = mlflow.register_model(
            model_uri=f"runs:/{run_id}/modelo_pipeline",
            name="model1"
        )

        print(f"[Run {idx+1}] Modelo registrado en MLflow")
        print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f}")

Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:03 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 11
Created version '11' of model 'model1'.


[Run 1] Modelo registrado en MLflow
Accuracy: 0.7208 | Precision: 0.6034
🏃 View run practica_01 at: http://localhost:9090/#/experiments/1/runs/cbbc4c020f4046a597a167a815631ebc
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 12
Created version '12' of model 'model1'.


[Run 2] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_02 at: http://localhost:9090/#/experiments/1/runs/6f05d7b9ffea4fc68d89cce5d206ea85
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 13
Created version '13' of model 'model1'.


[Run 3] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_03 at: http://localhost:9090/#/experiments/1/runs/01d695e981d949fb812ffe681f04f062
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:11 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 14
Created version '14' of model 'model1'.


[Run 4] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_04 at: http://localhost:9090/#/experiments/1/runs/7331294ded1a42498dadb8e2573850c9
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 15
Created version '15' of model 'model1'.


[Run 5] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_05 at: http://localhost:9090/#/experiments/1/runs/4db25553bb884fbd89cbb06980b28a83
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 16
Created version '16' of model 'model1'.


[Run 6] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_06 at: http://localhost:9090/#/experiments/1/runs/fa6e02a708fe4ed4bec5d6c913e5e58c
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 17
Created version '17' of model 'model1'.


[Run 7] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_07 at: http://localhost:9090/#/experiments/1/runs/fbf059eba27f4a34b655eb63df44622f
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 18
Created version '18' of model 'model1'.


[Run 8] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_08 at: http://localhost:9090/#/experiments/1/runs/8bc3598d362d4abb90263b682ff2b285
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:25 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 19
Created version '19' of model 'model1'.


[Run 9] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_09 at: http://localhost:9090/#/experiments/1/runs/9d27907355e042ffb798412f17d33add
🧪 View experiment at: http://localhost:9090/#/experiments/1


Registered model 'model1' already exists. Creating a new version of this model...
2025/05/21 21:56:28 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: model1, version 20


[Run 10] Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run practica_10 at: http://localhost:9090/#/experiments/1/runs/42d4e33ad0b44013a5d8289b1aa9506f
🧪 View experiment at: http://localhost:9090/#/experiments/1


Created version '20' of model 'model1'.


In [25]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
models = client.search_registered_models()
for m in models:
    print(m.name)

model1
