# Example Model Registration

This notebook provides an example of using MLFlow to register a model as discussed in [https://bradleyboehmke.github.io/uc-bana-7075/07-modelops-versioning.html](https://bradleyboehmke.github.io/uc-bana-7075/07-modelops-versioning.html).

## Requirements

In [1]:
import mlflow
import mlflow.xgboost
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from apple_data import generate_apple_sales_data_with_promo_adjustment

In [2]:
# silence some unnecessary messages caused by great expectations
import warnings
warnings.filterwarnings('ignore')

## Set experiment

In [3]:
# Set experiment name
mlflow.set_experiment("Forecasting Apple Demand")

<Experiment: artifact_location='file:///Users/b294776/Desktop/workspace/training/UC/uc-bana-7075/ModelOps/mlruns/186119791991456899', creation_time=1737915596015, experiment_id='186119791991456899', last_update_time=1737915596015, lifecycle_stage='active', name='Forecasting Apple Demand', tags={}>

## Register model from existing model run

In [4]:
# check out the existing model runs
all_runs = mlflow.search_runs(search_all_experiments=True)
print(all_runs)

                             run_id       experiment_id    status  \
0  1d4f349e3edc481685c691ef773dfb7e  186119791991456899  FINISHED   
1  9729fd97ce074ee2862cc32e1089c513  186119791991456899  FINISHED   
2  3a460bec71464493bdc4da5ec9537796  186119791991456899  FINISHED   
3  dfe70656a2f64c11b35078e288f962ab  186119791991456899  FINISHED   

                                        artifact_uri  \
0  file:///Users/b294776/Desktop/workspace/traini...   
1  file:///Users/b294776/Desktop/workspace/traini...   
2  file:///Users/b294776/Desktop/workspace/traini...   
3  file:///Users/b294776/Desktop/workspace/traini...   

                        start_time                         end_time  \
0 2025-01-26 18:21:05.944000+00:00 2025-01-26 18:21:11.317000+00:00   
1 2025-01-26 18:21:00.330000+00:00 2025-01-26 18:21:05.873000+00:00   
2 2025-01-26 18:20:01.595000+00:00 2025-01-26 18:20:04.736000+00:00   
3 2025-01-26 18:19:56.389000+00:00 2025-01-26 18:20:01.247000+00:00   

   metrics.rmse  

In [5]:
# extract model run ID for tuned random forest model
run = all_runs['tags.mlflow.runName'] == 'Random Forest Hyperparameter Tuning'
run_id = all_runs[run]['run_id'].iloc[0]

In [6]:
# register this model
result = mlflow.register_model(f'runs:/{run_id}', 'apple_demand')

Registered model 'apple_demand' already exists. Creating a new version of this model...
Created version '2' of model 'apple_demand'.


## Register model during new model run

In [7]:
# Create data
data = generate_apple_sales_data_with_promo_adjustment(base_demand=1_000, n_rows=1_000)
X = data.drop(columns=["date", "demand"])
y = data["demand"]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an XGBoost model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

# Evaluate the model
y_pred = xgb_model.predict(X_val)
rmse = np.sqrt(mean_squared_error(y_val, y_pred))

# Log experiment details
with mlflow.start_run(run_name="XGBoost Model"):
    mlflow.log_param("model_type", "XGBoost")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("learning_rate", 0.1)
    mlflow.log_param("max_depth", 5)
    mlflow.log_metric("rmse", rmse)
    mlflow.xgboost.log_model(xgb_model, artifact_path="artifacts")

    # Register model programmatically
    active_run = mlflow.active_run().info.run_id
    model_uri = f"runs:/{active_run}/model"
    registered_model = mlflow.register_model(model_uri=model_uri, name="apple_demand")

    # Add metadata: Tags and Aliases
    client = mlflow.tracking.MlflowClient()
    client.set_registered_model_tag(
        registered_model.name, "validation_status", "pending"
    )
    client.set_registered_model_alias(
        registered_model.name, "challenger", version=registered_model.version
    )

Registered model 'apple_demand' already exists. Creating a new version of this model...
Created version '3' of model 'apple_demand'.


## Query registered models

In [8]:
mlflow.search_registered_models()

[<RegisteredModel: aliases={'challenger': '3', 'champion': '1'}, creation_timestamp=1737916057537, description='', last_updated_timestamp=1737916154049, latest_versions=[<ModelVersion: aliases=['challenger'], creation_timestamp=1737916154031, current_stage='None', description=None, last_updated_timestamp=1737916154031, name='apple_demand', run_id='021eea67d5ea4be5967ca90efe8866db', run_link=None, source='file:///Users/b294776/Desktop/workspace/training/UC/uc-bana-7075/ModelOps/mlruns/186119791991456899/021eea67d5ea4be5967ca90efe8866db/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>], name='apple_demand', tags={'validation_status': 'pending'}>]

## Computing Environment

In [9]:
import sys

print(f'Python version: {sys.version}', end='\n\n')

with open('modelops-requirements.txt', 'r') as file:
    for line in file:
        print(line.strip())

Python version: 3.12.7 | packaged by Anaconda, Inc. | (main, Oct  4 2024, 08:28:27) [Clang 14.0.6 ]

mlflow==2.12.2
numpy==1.26.4
pandas==2.1.4
scikit-learn==1.5.1
xgboost==2.1.3
