# Example Model Registration

This notebook provides an example of using MLFlow to register a model as discussed in [https://bradleyboehmke.github.io/uc-bana-7075/07-modelops-versioning.html](https://bradleyboehmke.github.io/uc-bana-7075/07-modelops-versioning.html).

## Requirements

In [1]:
import mlflow
import mlflow.xgboost
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from apple_data import generate_apple_sales_data_with_promo_adjustment

In [2]:
# silence some unnecessary messages caused by great expectations
import warnings
warnings.filterwarnings('ignore')

## Set experiment

In [3]:
# Set experiment name
mlflow.set_experiment("Forecasting Apple Demand")

<Experiment: artifact_location='file:///Users/b294776/Desktop/workspace/training/UC/uc-bana-7075/ModelOps/mlruns/151640156787012125', creation_time=1738854716745, experiment_id='151640156787012125', last_update_time=1738854716745, lifecycle_stage='active', name='Forecasting Apple Demand', tags={}>

## Register model from existing model run

In [4]:
# check out the existing model runs
all_runs = mlflow.search_runs(search_all_experiments=True)
print(all_runs)

                             run_id       experiment_id    status  \
0  0344c3955f7e46ed9799b2fcbd86ca77  151640156787012125  FINISHED   
1  d6f75b5268304d7489edebb9761a3a5a  151640156787012125  FINISHED   
2  c99bf33dc4ec4cc39a33313435b7ddcb  151640156787012125  FINISHED   
3  adc50abaaccf4d968c886e4227c6315f  151640156787012125  FINISHED   

                                        artifact_uri  \
0  file:///Users/b294776/Desktop/workspace/traini...   
1  file:///Users/b294776/Desktop/workspace/traini...   
2  file:///Users/b294776/Desktop/workspace/traini...   
3  file:///Users/b294776/Desktop/workspace/traini...   

                        start_time                         end_time  \
0 2025-02-06 15:13:39.086000+00:00 2025-02-06 15:13:45.082000+00:00   
1 2025-02-06 15:13:32.490000+00:00 2025-02-06 15:13:38.986000+00:00   
2 2025-02-06 15:12:09.416000+00:00 2025-02-06 15:12:15.275000+00:00   
3 2025-02-06 15:11:57.588000+00:00 2025-02-06 15:12:09.041000+00:00   

   metrics.r2  me

In [5]:
# extract model run ID for tuned random forest model
run = all_runs['tags.mlflow.runName'] == 'Random Forest Hyperparameter Tuning'
run_id = all_runs[run]['run_id'].iloc[0]

In [6]:
# register this model
result = mlflow.register_model(f'runs:/{run_id}', 'apple_demand')

Registered model 'apple_demand' already exists. Creating a new version of this model...
Created version '2' of model 'apple_demand'.


## Register model during new model run

In [7]:
# Create data
data = generate_apple_sales_data_with_promo_adjustment(base_demand=1_000, n_rows=1_000)
X = data.drop(columns=["date", "demand"])
y = data["demand"]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an XGBoost model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

# Evaluate the model
y_pred = xgb_model.predict(X_val)
rmse = np.sqrt(mean_squared_error(y_val, y_pred))

# Log experiment details
with mlflow.start_run(run_name="XGBoost Model"):
    mlflow.log_param("model_type", "XGBoost")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("learning_rate", 0.1)
    mlflow.log_param("max_depth", 5)
    mlflow.log_metric("rmse", rmse)
    mlflow.xgboost.log_model(xgb_model, artifact_path="artifacts")

    # Register model programmatically
    active_run = mlflow.active_run().info.run_id
    model_uri = f"runs:/{active_run}/model"
    registered_model = mlflow.register_model(model_uri=model_uri, name="apple_demand")

    # Add metadata: Tags and Aliases
    client = mlflow.tracking.MlflowClient()
    client.set_registered_model_tag(
        registered_model.name, "validation_status", "pending"
    )
    client.set_registered_model_alias(
        registered_model.name, "challenger", version=registered_model.version
    )

Registered model 'apple_demand' already exists. Creating a new version of this model...
Created version '3' of model 'apple_demand'.


## Query registered models

In [8]:
mlflow.search_registered_models()

[<RegisteredModel: aliases={'challenger': '3', 'champion': '1'}, creation_timestamp=1738855073611, description='', last_updated_timestamp=1738855344360, latest_versions=[<ModelVersion: aliases=['challenger'], creation_timestamp=1738855309446, current_stage='None', description=None, last_updated_timestamp=1738855309446, name='apple_demand', run_id='2e3ee9972ee84f498468b351c0ec2ca3', run_link=None, source='file:///Users/b294776/Desktop/workspace/training/UC/uc-bana-7075/ModelOps/mlruns/151640156787012125/2e3ee9972ee84f498468b351c0ec2ca3/artifacts/model', status='READY', status_message=None, tags={'validation_status': 'pending'}, user_id=None, version=3>], name='apple_demand', tags={'validation_status': 'pending'}>]

## Computing Environment

In [9]:
import sys

print(f'Python version: {sys.version}', end='\n\n')

with open('modelops-requirements.txt', 'r') as file:
    for line in file:
        print(line.strip())

Python version: 3.12.7 | packaged by Anaconda, Inc. | (main, Oct  4 2024, 08:28:27) [Clang 14.0.6 ]

fastapi==0.115.7
mlflow==2.12.2
numpy==1.26.4
pandas==2.1.4
python-multipart==0.0.20
scikit-learn==1.5.1
streamlit==1.37.1
uvicorn==0.34.0
xgboost==2.1.3
