# Model registration and versioning with MLFlow

In [1]:
import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn

import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [2]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


In [13]:
import mlflow

'''
# Set MLflow tracking URI to a directory that exists
mlflow.set_tracking_uri('3_Model versioning and registration with MLFlow/mlruns1')
'''
mlflow.set_tracking_uri("http://localhost:5000")


In [14]:
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
import warnings
import logging

warnings.filterwarnings("ignore")
np.random.seed(40)

# Initialize logger
logger = logging.getLogger(__name__)

# Read the wine-quality csv file from the URL
csv_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
try:
    data = pd.read_csv(csv_url, sep=";")
except Exception as e:
    logger.exception("Unable to download CSV. Error: %s", e)
    exit()

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data, test_size=0.25, random_state=42)

# The predicted column is "quality"
train_x = train.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_x = test.drop(["quality"], axis=1)
test_y = test[["quality"]]

# ElasticNet hyperparameters
alpha = 0.5
l1_ratio = 0.5

# Start MLflow
client = MlflowClient()
experiment_name = "WineQualityExperiment"
experiment = client.get_experiment_by_name(experiment_name)

if not experiment:
    experiment_id = client.create_experiment(experiment_name)
else:
    experiment_id = experiment.experiment_id

with mlflow.start_run(experiment_id=experiment_id):
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)

    predicted_qualities = lr.predict(test_x)

    # Assuming eval_metrics is a function you've defined
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    print(f"ElasticNet model (alpha={alpha}, l1_ratio={l1_ratio}):")
    print(f"  RMSE: {rmse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")

    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)

    mlflow.sklearn.log_model(lr, "model", registered_model_name="ElasticnetWineModel")


ElasticNet model (alpha=0.5, l1_ratio=0.5):
  RMSE: 0.7436470916334205
  MAE: 0.6042761768399744
  R2: 0.10601910075094556


Successfully registered model 'ElasticnetWineModel'.
2023/12/09 14:57:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ElasticnetWineModel, version 1
Created version '1' of model 'ElasticnetWineModel'.


In [15]:
from IPython.display import IFrame

# MLflow UI URL
mlflow_ui_url = "http://localhost:5000"

# Display the MLflow UI in an iframe
# Set width to 100% to use the full width of the notebook
# Adjust height as needed, and make the frame scrollable if the content is longer
IFrame(mlflow_ui_url, width="100%", height=400, scrolling="yes")