### ENV VARS export

In [226]:
# Para que funcione, todos nuestros scripts debemos exportar las siguientes variables de entorno
%env AWS_ACCESS_KEY_ID=minio   
%env AWS_SECRET_ACCESS_KEY=minio123 
%env MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
%env AWS_ENDPOINT_URL_S3=http://localhost:9000

env: AWS_ACCESS_KEY_ID=minio
env: AWS_SECRET_ACCESS_KEY=minio123
env: MLFLOW_S3_ENDPOINT_URL=http://localhost:9000
env: AWS_ENDPOINT_URL_S3=http://localhost:9000


### Carga de utilidades

In [227]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../')))

In [228]:
from utilities.scripts.procesamiento import procesar_dataframe_completo

### Carga del dataset

***NOTA***: por el momento, el dataset se lee y procesa *localmente*. Una vez configurado el resto de los servicios (*Airflow*, *FastAPI*, etc.) se leerá y procesará mediante docker containers.

In [229]:
import pandas as pd
import numpy as np

In [230]:
body_fat_dataset = pd.read_csv("./datasets/bodyfat.csv")

In [231]:
target = "BodyFat"
test_size = 0.3
random_state = 323123

In [232]:
X_train, X_test, y_train, y_test = procesar_dataframe_completo(dataframe=body_fat_dataset, random_state=random_state, target=target, test_size=test_size)

### MLFLow playground

In [233]:
import mlflow

#### MLFLow - configuración

In [234]:
TRACKING_URL = "http://localhost:5001"
EXPERIMENT_NAME = "body_fat_regression_model"
PROJECT_NAME = "body_fat"
TEAM_NAME = "mlops1-fiuba"

In [235]:
mlflow.set_tracking_uri(TRACKING_URL)
client = mlflow.MlflowClient()

In [236]:
experiment_name = EXPERIMENT_NAME

if not mlflow.get_experiment_by_name(experiment_name):
    mlflow.create_experiment(name=experiment_name, 
                             tags={"project":PROJECT_NAME, 
                                   "team": TEAM_NAME}) 

experiment = mlflow.get_experiment_by_name(experiment_name)

### MLFLow - Logueo de modelos - experimentación

In [246]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, r2_score, root_mean_squared_error

In [247]:
# Armamos los modelos base
model = DecisionTreeRegressor()

# Entrenamos el modelo
model.fit(X_train, y_train)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [248]:
MODEL_NAME = "body_fat_decision_tree_regressor"
RUN_NAME = "decision_tree_regressor"

In [249]:
try:
    # Logueo del modelo y métricas
    mlflow.start_run(experiment_id = experiment.experiment_id, 
                    run_name=RUN_NAME,
                    tags={"model":"decision_tree_regressor"})

    # Logueamos los parametros del modelo
    mlflow.log_params(model.get_params())

    y_pred = model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred),
    rmse = root_mean_squared_error(y_test, y_pred),
    r2 = r2_score(y_test, y_pred)

    # Logueo de métricas del modelo en MLFlow
    metrics = {
        "MAE": mae[0],
        "RMSE": rmse[0],
        "r2": r2
    }

    print(metrics)
    mlflow.log_metrics(metrics)

    # Registramos el modelo y los datos de entrenamiento
    mlflow.sklearn.log_model(model, 
                            MODEL_NAME,
                            input_example=X_test[0:1])
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    mlflow.end_run()

{'MAE': 4.798684210526316, 'RMSE': 5.652840459729772, 'r2': 0.6142297762139428}


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 301.19it/s]  


🏃 View run decision_tree_regressor at: http://localhost:5001/#/experiments/1/runs/d4cb768ae28f4330942939d049952069
🧪 View experiment at: http://localhost:5001/#/experiments/1


### MLFLow - creación de modelo productivo

In [None]:
PROD_MODEL_NAME = "body_fat_productive"
PROD_MODEL_DESCRIPTION = "Modelo que permite predecir el 'body fat' de un paciente."

In [268]:
# Leemos el modelo
try:
    registered_model = client.get_registered_model(name=PROD_MODEL_NAME)
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    registered_model = None

# SI no existe el modelo, lo creamos
if registered_model is None:
    registered_model = client.create_registered_model(name=PROD_MODEL_NAME, description=PROD_MODEL_DESCRIPTION)

In [252]:
# Carga del modelo
model_runs = mlflow.search_runs(
            experiment_ids=experiment.experiment_id,
            filter_string=f"tags.mlflow.runName = '{RUN_NAME}'"   
        )

run_id_dt_reg = model_runs['run_id'].iloc[0]
uri_dt_reg = f"runs:/{run_id_dt_reg}/{MODEL_NAME}" 
print(uri_dt_reg)
model_dt_reg = mlflow.sklearn.load_model(uri_dt_reg)

runs:/d4cb768ae28f4330942939d049952069/body_fat_decision_tree_regressor


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 256.02it/s]


In [260]:
result_dt_reg = client.create_model_version(
    name=PROD_MODEL_NAME,
    source=uri_dt_reg,
    run_id=run_id_dt_reg,
    description="Regresion logistica"
)

client.set_registered_model_alias(PROD_MODEL_NAME, "champion", result_dt_reg.version)

2025/09/22 22:20:30 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: body_fat_productive, version 3
