In [1]:
import pandas as pd
import numpy as np
import yaml
from datetime import datetime

import mlflow
from mlflow.models import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
## para evaluar modelo
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

### entrenamos y testeamos el modelo

In [3]:
# cargamos datos, es un ejemplo sencillo que se ofrece desde la odc de mlflow
csv_url = "https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv"
data = pd.read_csv(csv_url, sep=";")

# split de datos
train, test = train_test_split(data)
# definimos target
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]

In [4]:
# hip del modelo
alpha = 0.5
l1_ratio = 0.5
random_state = 42
max_iter = 1000

In [5]:
# definimos y entrenamos el modelo
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=random_state, max_iter=max_iter)
lr.fit(train_x, train_y)

In [6]:
## evaluamos modelo
predictions = lr.predict(test_x)
rmse, mae, r2 = eval_metrics(test_y, predictions)

print(f"ElasticNet model (alpha={alpha}, l1_ratio={l1_ratio}):")
print(f"RMSE: {rmse}, MAE: {mae}, R2: {r2}")

ElasticNet model (alpha=0.5, l1_ratio=0.5):
RMSE: 0.7206583671436573, MAE: 0.5917380217451235, R2: 0.14685205041667826


### Logging en ML FLOW

In [7]:
with open("config_mlflow.yaml", "r") as file:
    config = yaml.safe_load(file)

# configs de Mlflow
mlflow_config = config["mlflow"]
tracking_uri = mlflow_config["tracking_uri"]
experiment_name = mlflow_config["experiment_name"]
experiment_description = mlflow_config["experiment_description"]

In [9]:
# crear experimento
mlflow.set_tracking_uri(tracking_uri)

if not mlflow.get_experiment_by_name(name=experiment_name):
    mlflow.create_experiment(name=experiment_name)

experiment = mlflow.get_experiment_by_name(experiment_name)

mlflow.set_experiment_tag("mlflow.note.content", experiment_description)

# Definir los tags
tags = {
    "env": "test",
    "data_date": datetime.now().strftime("%Y-%m-%d"),
    "model_type": "ElasticNet",
    "experiment_description": "DEMO 2 IVAN MLFLOW ELASTICNET"
}

In [None]:
run_name = datetime.now().strftime("%Y-%m-%d_%H:%M")
with mlflow.start_run(
    experiment_id=experiment.experiment_id,
    run_name=run_name,
    tags=tags
):
    # Loguear hiperparámetros
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_param("random_state", random_state)
    mlflow.log_param("max_iter", max_iter)
    
    # Loguear métricas
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)
    
    # Loguear el modelo
    signature = infer_signature(train_x, predictions)
    mlflow.sklearn.log_model(lr, "model", signature=signature)