# Test MLFlow Integration

- start experiment 
- train model
- save metrics
- save artifact

In [None]:
# Please check the requirements.in file for more details
!pip install -r requirements.txt

In [None]:
import os
import warnings

import pandas as pd
import mlflow
import numpy as np

from minio import Minio
from mlflow.models.signature import infer_signature
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from tenacity import retry, stop_after_attempt, wait_exponential

# suppress warnings
warnings.filterwarnings("ignore")

In [None]:
data = pd.read_csv(
    "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",
    sep=";",
)
data.head()

In [None]:
data.shape

In [None]:
TARGET_COLUMN = "quality"
train, test = train_test_split(data)

train_x = train.drop([TARGET_COLUMN], axis=1)
test_x = test.drop([TARGET_COLUMN], axis=1)
train_y = train[[TARGET_COLUMN]]
test_y = test[[TARGET_COLUMN]]

In [None]:
MINIO_HOST = os.environ["MINIO_ENDPOINT_URL"].split("http://")[1]
MINIO_BUCKET = "mlflow"

In [None]:
# Initialize a MinIO client
mc = Minio(
    endpoint=MINIO_HOST,
    access_key=os.environ["AWS_ACCESS_KEY_ID"],
    secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
    secure=False,
)

# Create bucket if it doesn't exist
if not mc.bucket_exists(MINIO_BUCKET):
    mc.make_bucket(MINIO_BUCKET)
    print(f"Created bucket {MINIO_BUCKET}")
else:
    print(f"Bucket {MINIO_BUCKET} already exists!")

In [None]:
wine_experiment_name = "My Wine Experiment"
experiment = mlflow.get_experiment_by_name(wine_experiment_name)
experiment_id = (
    mlflow.create_experiment(name=wine_experiment_name)
    if experiment is None
    else experiment.experiment_id
)

In [None]:
# check that the experiment was created successfully
assert (
    mlflow.get_experiment(experiment_id).name == wine_experiment_name
), f"Failed to create experiment {wine_experiment_name}!"

In [None]:
def experiment(alpha, l1_ratio):
    mlflow.sklearn.autolog()
    with mlflow.start_run(run_name="wine_models", experiment_id=experiment_id) as run:
        mlflow.set_tag("author", "kf-testing")
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        pred_y = lr.predict(test_x)
        mlflow.log_metric("rmse", np.sqrt(mean_squared_error(test_y, pred_y)))
        mlflow.log_metric("r2", r2_score(test_y, pred_y))
        mlflow.log_metric("mae", mean_absolute_error(test_y, pred_y))

        signature = infer_signature(test_x, pred_y)
        mlflow.sklearn.log_model(
            lr, "model", registered_model_name="wine-elasticnet", signature=signature
        )

    return run

In [None]:
# run experiments
runs = [
    experiment(0.5, 0.5),
    experiment(1, 0),
    experiment(0, 1),
]

In [None]:
@retry(
    wait=wait_exponential(multiplier=2, min=1, max=10),
    stop=stop_after_attempt(30),
    reraise=True,
)
def assert_run_finished(client, run_id):
    """Wait for the run to complete successfully."""
    status = client.get_run(run_id).info.status
    assert status == "FINISHED", f"MLFlow run in {status} state."


def assert_has_metrics(client, run_id, metrics):
    """Assert that the run contains the specified metrics."""
    run = client.get_run(run_id)
    for m in metrics:
        assert m in run.data.metrics, f"Metric {m} not found in logged data!"


def assert_model(client, run_id):
    """Assert Model exists."""
    model = client.sklearn.load_model(f"runs:/{run_id}/model")
    assert isinstance(model, ElasticNet), f"Model {model} is not of type ElasticNet!"

In [None]:
METRICS = ["rmse", "r2", "mae"]

for run in runs:
    run_id = run.info.run_id
    assert_run_finished(mlflow, run_id)
    assert_has_metrics(mlflow, run_id, METRICS)
    assert_model(mlflow, run_id)