# Stage 2 - Lesson 3: Model Registry with Aliases

This notebook introduces the MLflow Model Registry using the latest approach with model aliases instead of deprecated stages.

**Learning Objectives:**
1. Understand model versioning
2. Learn to register models in MLflow
3. Manage model deployments using aliases
4. Organize models with tags

In [1]:
# Import required libraries
import os
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

# MLflow imports
import mlflow
import mlflow.xgboost
from mlflow.tracking import MlflowClient

# Model imports
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

# Set random seed for reproducibility
np.random.seed(42)

## 1. Connect to MLflow Tracking Server

First, we'll connect to our MLflow server and set up our experiment.

In [2]:
# Set MLflow tracking URI - using local server
mlflow.set_tracking_uri("http://localhost:5000")

# Set the experiment
experiment_name = "google_stock_model_registry"
mlflow.set_experiment(experiment_name)

# Initialize MLflow client
client = MlflowClient()

print(f"Connected to MLflow tracking server")
print(f"Using experiment: {experiment_name}")

Connected to MLflow tracking server
Using experiment: google_stock_model_registry


## 2. Load Data

We'll load our historical data that was prepared in previous lessons.

In [3]:
# Load historical data
data_path = '../../data/mlops_stages/historical_features.csv'
df = pd.read_csv(data_path)
df['Date'] = pd.to_datetime(df['Date'])
print(f"Loaded data with {df.shape[0]} rows and {df.shape[1]} columns")
df.head(3)

Loaded data with 802 rows and 305 columns


Unnamed: 0,Date,Adj Close,Volume,Volume_lag_1,Volume_lag_2,Volume_lag_3,Volume_lag_4,Volume_lag_5,Volume_lag_6,Volume_lag_7,...,Volume_lag_293,Volume_lag_294,Volume_lag_295,Volume_lag_296,Volume_lag_297,Volume_lag_298,Volume_lag_299,Volume_lag_300,Volume_rolling_mean_3,target
0,2021-06-16 00:00:00+00:00,120.980934,21562000.0,22098000.0,21958000.0,25904000.0,17942000.0,24240000.0,24120000.0,24458000.0,...,57896000.0,42236000.0,63358000.0,38702000.0,54028000.0,40334000.0,61620000.0,63320000.0,23320000.0,120.772499
1,2021-06-17 00:00:00+00:00,120.336266,26472000.0,21562000.0,22098000.0,21958000.0,25904000.0,17942000.0,24240000.0,24120000.0,...,51050000.0,57896000.0,42236000.0,63358000.0,38702000.0,54028000.0,40334000.0,61620000.0,21872670.0,121.7435
2,2021-06-18 00:00:00+00:00,121.303764,25842000.0,26472000.0,21562000.0,22098000.0,21958000.0,25904000.0,17942000.0,24240000.0,...,35292000.0,51050000.0,57896000.0,42236000.0,63358000.0,38702000.0,54028000.0,40334000.0,23377330.0,120.111


## 3. Prepare Data for Training

We'll split our data into training and test sets, respecting the time series nature.

In [4]:
def time_based_split(df, test_size=0.2):
    """Split time series data respecting temporal order"""
    # Sort by date
    df = df.sort_values('Date')
    
    # Calculate split index
    split_idx = int(len(df) * (1 - test_size))
    
    # Split the data
    train_df = df.iloc[:split_idx].copy()
    test_df = df.iloc[split_idx:].copy()
    
    return train_df, test_df

# Split the data
train_df, test_df = time_based_split(df)

print(f"Training data: {train_df.shape[0]} rows from {train_df['Date'].min()} to {train_df['Date'].max()}")
print(f"Test data: {test_df.shape[0]} rows from {test_df['Date'].min()} to {test_df['Date'].max()}")

Training data: 641 rows from 2021-06-16 00:00:00+00:00 to 2024-01-02 00:00:00+00:00
Test data: 161 rows from 2024-01-03 00:00:00+00:00 to 2024-08-22 00:00:00+00:00


## 4. Prepare Features and Target

Let's separate our features and target variable.

In [5]:
def prepare_features_target(df):
    """Prepare features and target"""
    # Exclude Date and target columns
    feature_cols = [col for col in df.columns if col not in ['Date', 'target']]
    
    # Extract features and target
    X = df[feature_cols]
    y = df['target']
    
    return X, y, feature_cols

# Prepare data for modeling
X_train, y_train, features = prepare_features_target(train_df)
X_test, y_test, _ = prepare_features_target(test_df)

print(f"Using {len(features)} features")

Using 303 features


## 5. Train and Register a Model

Now we'll train a model and register it in the MLflow Model Registry.

In [None]:
# Model parameters
params = {
    'objective': 'reg:squarederror',
    'learning_rate': 0.1,
    'max_depth': 6,
    'n_estimators': 100,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# Model name in registry
model_name = "google_stock_predictor"

# Start MLflow run
with mlflow.start_run(run_name="registry_model_v1") as run:
    # Log parameters
    mlflow.log_params(params)
    
    # Set tags
    mlflow.set_tag("model_type", "xgboost")
    mlflow.set_tag("data_version", "v1")
    mlflow.set_tag("author", "student")
    
    # Train model
    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    # Log metrics
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    from mlflow.models.signature import infer_signature
    signature = infer_signature(X_train, y_train_pred)
    
    # Provide input example
    input_example = X_train[:4]
    # Log model to registry
    mlflow.xgboost.log_model(
        model,
        "xgboost_model",
        registered_model_name=model_name.
        signature=signature,
        input_example=input_example,
        model_format="json"
    )
    
    run_id = run.info.run_id
    print(f"Model training complete. Run ID: {run_id}")
    print(f"Test RMSE: ${rmse:.2f}")
    print(f"Test R²: {r2:.4f}")

Successfully registered model 'google_stock_predictor'.
2025/02/19 15:09:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: google_stock_predictor, version 1


Model training complete. Run ID: 9cb02419c710421d8acb26d2225750bd
Test RMSE: $22.48
Test R²: -1.0292
🏃 View run registry_model_v1 at: http://localhost:5000/#/experiments/457499682337239906/runs/9cb02419c710421d8acb26d2225750bd
🧪 View experiment at: http://localhost:5000/#/experiments/457499682337239906


Created version '1' of model 'google_stock_predictor'.


## 6. Manage Model Versions

Now let's train another model with different parameters and register it as a new version.

In [None]:
# Updated parameters for second model
params_v2 = {
    'objective': 'reg:squarederror',
    'learning_rate': 0.05,
    'max_depth': 8,
    'n_estimators': 200,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'seed': 42
}

# Train and register second model
with mlflow.start_run(run_name="registry_model_v2") as run:
    # Log parameters
    mlflow.log_params(params_v2)
    
    # Set tags
    mlflow.set_tag("model_type", "xgboost")
    mlflow.set_tag("data_version", "v1")
    mlflow.set_tag("improvement", "deeper_trees")
    
    # Train model
    model_v2 = xgb.XGBRegressor(**params_v2)
    model_v2.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model_v2.predict(X_test)
    
    # Calculate metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    # Log metrics
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    
    # Log model to registry (same name creates new version)
    from mlflow.models.signature import infer_signature
    signature = infer_signature(X_train, y_train_pred)
    
    # Provide input example
    input_example = X_train[:4]
    mlflow.xgboost.log_model(
        model_v2,
        "xgboost_model",
        registered_model_name=model_name,
        signature=signature,
        input_example=input_example,
        model_format="json"
    )
    
    run_id = run.info.run_id
    print(f"Model v2 training complete. Run ID: {run_id}")
    print(f"Test RMSE: ${rmse:.2f}")
    print(f"Test R²: {r2:.4f}")

Registered model 'google_stock_predictor' already exists. Creating a new version of this model...
2025/02/19 15:10:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: google_stock_predictor, version 2
Created version '2' of model 'google_stock_predictor'.


Model v2 training complete. Run ID: 2a0104b4dd5049dfbc947221feec732a
Test RMSE: $23.00
Test R²: -1.1245
🏃 View run registry_model_v2 at: http://localhost:5000/#/experiments/457499682337239906/runs/2a0104b4dd5049dfbc947221feec732a
🧪 View experiment at: http://localhost:5000/#/experiments/457499682337239906


## 7. View Registered Models

Let's examine the models we've registered.

In [8]:
# Get registered model
model_details = client.get_registered_model(model_name)
print(f"Model: {model_details.name}")
print(f"Created: {model_details.creation_timestamp}")

# Get all versions
versions = client.search_model_versions(f"name='{model_name}'")
print(f"\nFound {len(versions)} model versions:")
for version in versions:
    print(f"Version: {version.version}, Status: {version.status}")

Model: google_stock_predictor
Created: 1739948995314

Found 2 model versions:
Version: 2, Status: READY
Version: 1, Status: READY


## 8. Manage Model Aliases

Now let's assign aliases to our model versions to organize them for different environments.

In [9]:
# Get the latest version number
latest_version = str(max([int(v.version) for v in versions]))

# Assign 'candidate' alias to the latest version
client.set_registered_model_alias(
    name=model_name,
    alias="candidate",
    version=latest_version
)

print(f"Assigned 'candidate' alias to Version {latest_version}")

# Add description to explain why this model was promoted
client.update_model_version(
    name=model_name,
    version=latest_version,
    description="Improved model with deeper trees and lower learning rate. Assigned 'candidate' alias for validation."
)

Assigned 'candidate' alias to Version 2


<ModelVersion: aliases=['candidate'], creation_timestamp=1739949013102, current_stage='None', description=('Improved model with deeper trees and lower learning rate. Assigned '
 "'candidate' alias for validation."), last_updated_timestamp=1739949013507, name='google_stock_predictor', run_id='2a0104b4dd5049dfbc947221feec732a', run_link='', source='file:C:/Users/hohoy/OneDrive/Desktop/sagemaker-ncf-mlflow/tutorial_notebook/stage2/notebooks/../mlruns/google_stock_model_registry/2a0104b4dd5049dfbc947221feec732a/artifacts/xgboost_model', status='READY', status_message=None, tags={}, user_id='', version='2'>

## 9. Add Tags to Models

Let's add tags to our model versions to track their status.

In [10]:
# Add validation status tag to our candidate model
client.set_model_version_tag(
    name=model_name,
    version=latest_version,
    key="validation_status",
    value="pending"
)

# Add purpose tag to the registered model
client.set_registered_model_tag(
    name=model_name,
    key="purpose",
    value="stock_price_prediction"
)

print(f"Added tags to model and version {latest_version}")

Added tags to model and version 2


## 10. Load and Use a Model by Alias

Now let's load our model from the registry by referencing its alias.

In [11]:
# Load the model using the alias
candidate_model = mlflow.xgboost.load_model(f"models:/{model_name}@candidate")

# Make predictions with the candidate model
candidate_predictions = candidate_model.predict(X_test[:5])

# Show predictions
results = pd.DataFrame({
    'Date': test_df['Date'][:5],
    'Actual': y_test[:5],
    'Predicted': candidate_predictions
})

print("Predictions from candidate model:")
results

Predictions from candidate model:


Unnamed: 0,Date,Actual,Predicted
641,2024-01-03 00:00:00+00:00,138.919998,136.569473
642,2024-01-04 00:00:00+00:00,136.389999,135.77243
643,2024-01-05 00:00:00+00:00,135.729996,135.522675
644,2024-01-08 00:00:00+00:00,138.839996,133.18222
645,2024-01-09 00:00:00+00:00,140.949997,135.432846


## 11. Promote Model to Production

After validation, let's promote our model to production by assigning the 'champion' alias.

In [12]:
# First, simulate validation by updating the validation status tag
client.set_model_version_tag(
    name=model_name,
    version=latest_version,
    key="validation_status",
    value="passed"
)

# Assign 'champion' alias to the validated model version
client.set_registered_model_alias(
    name=model_name,
    alias="champion",
    version=latest_version
)

print(f"Assigned 'champion' alias to Version {latest_version}")

# Update description
client.update_model_version(
    name=model_name,
    version=latest_version,
    description="Promoted to champion after validation. Model shows improved RMSE over previous version."
)

Assigned 'champion' alias to Version 2


<ModelVersion: aliases=['candidate', 'champion'], creation_timestamp=1739949013102, current_stage='None', description=('Promoted to champion after validation. Model shows improved RMSE over '
 'previous version.'), last_updated_timestamp=1739949014187, name='google_stock_predictor', run_id='2a0104b4dd5049dfbc947221feec732a', run_link='', source='file:C:/Users/hohoy/OneDrive/Desktop/sagemaker-ncf-mlflow/tutorial_notebook/stage2/notebooks/../mlruns/google_stock_model_registry/2a0104b4dd5049dfbc947221feec732a/artifacts/xgboost_model', status='READY', status_message=None, tags={'validation_status': 'passed'}, user_id='', version='2'>

## 12. View Model Aliases

Let's examine the aliases we've assigned to our model versions.

In [13]:

# Get details of the registered model
model_details = client.get_registered_model(model_name)

# Print all aliases for the registered model
print(f"Aliases for model '{model_name}':")
for alias_name, version_number in model_details.aliases.items():
    print(f"  - {alias_name}: points to version {version_number}")

# Get model version by alias
champion_version = client.get_model_version_by_alias(model_name, "champion")
print(f"\nChampion model: Version {champion_version.version}")
print(f"Creation timestamp: {champion_version.creation_timestamp}")

Aliases for model 'google_stock_predictor':
  - candidate: points to version 2
  - champion: points to version 2

Champion model: Version 2
Creation timestamp: 1739949013102


## 13. Rollback Process

Let's simulate a rollback scenario where we need to revert to a previous model version.

In [14]:
# Get all model versions
all_versions = client.search_model_versions(f"name='{model_name}'")
if len(all_versions) > 1:
    # Find the first version
    previous_version = str(min([int(v.version) for v in all_versions]))
    
    print(f"Simulating rollback from version {latest_version} to version {previous_version}")
    
    # Reassign the champion alias to the previous version 
    client.set_registered_model_alias(
        name=model_name,
        alias="champion",
        version=previous_version
    )
    
    # Add a tag to explain the rollback
    client.set_model_version_tag(
        name=model_name,
        version=previous_version,
        key="rollback_reason",
        value="Performance issues in newer model"
    )
    
    print(f"Rolled back 'champion' alias to version {previous_version}")
else:
    print("Need at least two model versions to demonstrate rollback")

Simulating rollback from version 2 to version 1
Rolled back 'champion' alias to version 1


## 14. Moving Models Between Environments

For multi-environment setups, you can use different registered models with similar model aliases.

In [15]:
# Simulate promoting a model from development to production environment
dev_model_name = "dev_google_stock_predictor"
prod_model_name = "prod_google_stock_predictor"

# Check if the model exists, if not, create it and explain the concept
try:
    client.get_registered_model(prod_model_name)
except:
    # Create the production model (in practice, you'd use the client.create_registered_model() method)
    print(f"In a real setup, we would create {prod_model_name} and copy the version over")
    print("\nConceptual demonstration:")
    print(f"1. Get candidate version from {model_name}")
    print(f"2. Use client.copy_model_version() to copy to {prod_model_name}")
    print(f"3. Assign 'champion' alias in the production environment")
    
    code_example = '''
    # Promote model from development to production (conceptual example)
    candidate_version = client.get_model_version_by_alias("dev_model", "candidate")
    
    # Copy the model version to production registered model
    copied_version = client.copy_model_version(
        src_model_uri=f"models:/dev_model/{candidate_version.version}",
        dst_name="prod_model"
    )
    
    # Assign champion alias in production
    client.set_registered_model_alias(
        name="prod_model",
        alias="champion",
        version=copied_version.version
    )
    '''
    
    print(f"\nExample code:\n{code_example}")

In a real setup, we would create prod_google_stock_predictor and copy the version over

Conceptual demonstration:
1. Get candidate version from google_stock_predictor
2. Use client.copy_model_version() to copy to prod_google_stock_predictor
3. Assign 'champion' alias in the production environment

Example code:

    # Promote model from development to production (conceptual example)
    candidate_version = client.get_model_version_by_alias("dev_model", "candidate")
    
    # Copy the model version to production registered model
    copied_version = client.copy_model_version(
        src_model_uri=f"models:/dev_model/{candidate_version.version}",
        dst_name="prod_model"
    )
    
    # Assign champion alias in production
    client.set_registered_model_alias(
        name="prod_model",
        alias="champion",
        version=copied_version.version
    )
    


## 15. Conclusion

In this lesson, we learned how to:
1. Register models in MLflow's Model Registry
2. Create multiple versions of a model
3. Use aliases to manage model deployments (instead of deprecated stages)
4. Apply tags for tracking model status and metadata
5. Load models using aliases
6. Perform rollbacks by reassigning aliases

These practices help maintain version control, ensure proper testing and validation, and create a clear workflow for deploying models to production without relying on deprecated stages.