In [1]:
import mlflow
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sqlalchemy import create_engine
import os

# --- 1. Conexión a Servicios ---
mlflow.set_tracking_uri("http://host.docker.internal:5000")

# Conexión a la Base de Datos PostgreSQL para los datos
db_user = 'mlflow_user'
db_pass = 'mlflow_pass'
db_host = 'host.docker.internal'
db_port = '5432'
db_name = 'mlflow_db'
engine = create_engine(f'postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}')
print("Conexiones establecidas.")

# --- 2. Carga y Almacenamiento de Datos en la DB ---
url = "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/main/inst/extdata/penguins.csv"
penguins_df = pd.read_csv(url)
penguins_df.dropna(inplace=True)
penguins_df.to_sql('raw_penguins_data', engine, if_exists='replace', index=False)
print("Datos crudos guardados en la tabla 'raw_penguins_data'.")

# --- 3. Procesamiento de Datos ---
features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
target = 'species'
df_processed = penguins_df[features + [target]]
df_processed.to_sql('processed_penguins_data', engine, if_exists='replace', index=False)
print("Datos procesados guardados en la tabla 'processed_penguins_data'.")
X = df_processed[features]
y = df_processed[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- 4. Experimentación y Registro en MLflow ---
experiment_name = "Penguin_Species_Classifier"
mlflow.set_experiment(experiment_name)
registered_model_name = "PenguinClassifierModel"
for i in range(20):
    with mlflow.start_run(run_name=f"run_{i+1}") as run:
        n_estimators = random.randint(10, 200)
        max_depth = random.randint(5, 20)
        min_samples_split = random.randint(2, 10)
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("min_samples_split", min_samples_split)
        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            random_state=42
        )
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        acc = accuracy_score(y_test, predictions)
        mlflow.log_metric("accuracy", acc)
        print(f"Run {i+1}: Accuracy = {acc:.4f}")

        if i == 0:
             mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="model",
                registered_model_name=registered_model_name
            )
print("¡Experimentación completada! Revisa la UI de MLflow en http://localhost:5000")

Conexiones establecidas.
Datos crudos guardados en la tabla 'raw_penguins_data'.
Datos procesados guardados en la tabla 'processed_penguins_data'.




Run 1: Accuracy = 0.9701


Successfully registered model 'PenguinClassifierModel'.
2025/09/22 04:35:45 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: PenguinClassifierModel, version 1
Created version '1' of model 'PenguinClassifierModel'.


🏃 View run run_1 at: http://host.docker.internal:5000/#/experiments/1/runs/83899f9179b74e2e8a9b94eada9b0096
🧪 View experiment at: http://host.docker.internal:5000/#/experiments/1
Run 2: Accuracy = 0.9701
🏃 View run run_2 at: http://host.docker.internal:5000/#/experiments/1/runs/defee20161f24ea19443edcbde8d02ee
🧪 View experiment at: http://host.docker.internal:5000/#/experiments/1
Run 3: Accuracy = 0.9701
🏃 View run run_3 at: http://host.docker.internal:5000/#/experiments/1/runs/b96a481f0b7745b3819ca1d0cb3db3c3
🧪 View experiment at: http://host.docker.internal:5000/#/experiments/1
Run 4: Accuracy = 0.9851
🏃 View run run_4 at: http://host.docker.internal:5000/#/experiments/1/runs/2fae1db003614ef5b063757f1cfdb918
🧪 View experiment at: http://host.docker.internal:5000/#/experiments/1
Run 5: Accuracy = 0.9851
🏃 View run run_5 at: http://host.docker.internal:5000/#/experiments/1/runs/ec86e3e424bc4d2bbe8378f41b25f150
🧪 View experiment at: http://host.docker.internal:5000/#/experiments/1
Run 6

In [2]:
import sklearn
print(sklearn.__version__)

1.3.1
