# Example Model Experimentation Tracking

This notebook provides an example of using MLFlow to track model experimentation as discussed in [TBD](TBD).

## Requirements

In [1]:
import mlflow
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

from apple_data import generate_apple_sales_data_with_promo_adjustment

In [2]:
# silence some unnecessary messages caused by great expectations
import warnings
warnings.filterwarnings('ignore')

## Create a new experiment

In [3]:
# Set experiment name
mlflow.set_experiment("Forecasting Apple Demand")

2025/01/21 12:52:58 INFO mlflow.tracking.fluent: Experiment with name 'Forecasting Apple Demand' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///Users/b294776/Desktop/workspace/training/UC/uc-bana-7075/ModelOps/mlruns/103668900385120254', creation_time=1737481978783, experiment_id='103668900385120254', last_update_time=1737481978783, lifecycle_stage='active', name='Forecasting Apple Demand', tags={}>

### Synthetic data generation

In [4]:
data = generate_apple_sales_data_with_promo_adjustment(base_demand=1_000, n_rows=1_000)

data.head()

Unnamed: 0,date,average_temperature,rainfall,weekend,holiday,price_per_kg,promo,demand,previous_days_demand
0,2022-04-28 12:54:28.554577,30.584727,6.786845,0,0,2.502464,1,1045,1045.0
1,2022-04-29 12:54:28.554576,15.465069,9.71652,0,0,1.87118,1,1079,1045.0
2,2022-04-30 12:54:28.554574,10.786525,1.099836,1,0,1.14916,1,1473,1079.0
3,2022-05-01 12:54:28.554573,23.648154,9.578136,1,0,0.891414,0,1269,1473.0
4,2022-05-02 12:54:28.554572,13.861391,4.693826,0,0,0.737711,0,873,1269.0


## Create training data

In [5]:
# Split the data into features and target and drop irrelevant date field and target field
X = data.drop(columns=["date", "demand"])
y = data["demand"]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

## Train regularized regression model

In [6]:
# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Identify any feature transformations
feature_params = {"preprocessing": "StandardScaler"}

In [7]:
model_params = {
    "alpha": 0.5,
}

# Train the Ridge model
reg = linear_model.Ridge(**model_params)

# Fit the model on the training data
reg.fit(X_train_scaled, y_train)

## Evaluate regularized regression model

In [8]:
# Predict on the validation set
y_pred = reg.predict(X_val_scaled)

# Calculate error metrics
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

# Assemble the metrics we're going to write into a collection
metrics = {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2}
metrics

{'mae': 45.56899051975028,
 'mse': 3374.0824286383668,
 'rmse': 58.08685245938505,
 'r2': 0.9061581753204919}

## Log experiment results & artifacts

In [9]:
run_name = "Regularized Regression"
artifact_path = "artifacts"

# Initiate the MLflow run context
with mlflow.start_run(run_name=run_name) as run:

    # Log dataset and preprocessing details
    training_data = pd.concat([X_train, y_train], axis=1)
    training_input = mlflow.data.from_pandas(training_data, targets='demand')
    mlflow.log_input(training_input, context="training data")
    mlflow.log_params(feature_params)

    # Log the parameters used for the model fit
    mlflow.log_params(model_params)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics)

    # Log an instance of the trained model for later use
    mlflow.sklearn.log_model(sk_model=reg, input_example=X_val_scaled, artifact_path=artifact_path)

## Train random forest model

In [10]:
# We're going to used the non-scaled data since random forest models
# do not require feature standardization
feature_params = {"preprocessing": "None"}

In [11]:
model_params = {
    "n_estimators": 100,
    "max_depth": 6,
    "min_samples_split": 10,
    "min_samples_leaf": 4,
    "bootstrap": True,
    "oob_score": False,
    "random_state": 888,
}

# Train the RandomForestRegressor
rf = RandomForestRegressor(**model_params)

# Fit the model on the training data
rf.fit(X_train, y_train)



## Evaluate random forest model

In [12]:
# Predict on the validation set
y_pred = rf.predict(X_val)

# Calculate error metrics
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

# Assemble the metrics we're going to write into a collection
metrics = {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2}
metrics

{'mae': 49.401040191295394,
 'mse': 3951.4336957489095,
 'rmse': 62.86043028606239,
 'r2': 0.8901005663163919}

## Log experiment results & artifacts

In [13]:
run_name = "Random Forest"

# Initiate the MLflow run context
with mlflow.start_run(run_name=run_name) as run:

    # Log dataset and preprocessing details
    training_data = pd.concat([X_train, y_train], axis=1)
    training_input = mlflow.data.from_pandas(training_data, targets='demand')
    mlflow.log_input(training_input, context="training data")
    mlflow.log_params(feature_params)

    # Log the parameters used for the model fit
    mlflow.log_params(model_params)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics)

    # Log an instance of the trained model for later use
    mlflow.sklearn.log_model(sk_model=rf, input_example=X_val, artifact_path=artifact_path)

## Random forest hyperparameter search

In [14]:
# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 6, 10, 20],
    'min_samples_split': [5, 10, 15],
    'min_samples_leaf': [2, 4, 6],
    'bootstrap': [True, False]
}

# Initialize the GridSearchCV object
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Get the best parameters and best estimator
best_params = grid_search.best_params_
best_rf = grid_search.best_estimator_

# Predict on the validation set using the best estimator
y_pred_best = best_rf.predict(X_val)

# Calculate error metrics
mae_best = mean_absolute_error(y_val, y_pred_best)
mse_best = mean_squared_error(y_val, y_pred_best)
rmse_best = np.sqrt(mse_best)
r2_best = r2_score(y_val, y_pred_best)

# Assemble the metrics we're going to write into a collection
metrics_best = {"mae": mae_best, "mse": mse_best, "rmse": rmse_best, "r2": r2_best}

# Log the experiment results and artifacts with MLflow
run_name = "Random Forest Hyperparameter Tuning"

with mlflow.start_run(run_name=run_name) as run:
    # Log dataset and preprocessing details
    training_data = pd.concat([X_train, y_train], axis=1)
    training_input = mlflow.data.from_pandas(training_data, targets='demand')
    mlflow.log_input(training_input, context="training data")
    mlflow.log_params(feature_params)

    # Log the best parameters found by GridSearchCV
    mlflow.log_params(best_params)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics_best)

    # Log an instance of the best trained model for later use
    mlflow.sklearn.log_model(sk_model=best_rf, input_example=X_val, artifact_path=artifact_path)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.2s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.2s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   0.2s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total ti

## Regularization hyperparameter search

In [15]:
# Define the parameter grid for Ridge regression
param_grid_ridge = {
    'alpha': [0.1, 0.5, 1.0, 5.0, 10.0]
}

# Initialize the GridSearchCV object for Ridge regression
grid_search_ridge = GridSearchCV(estimator=linear_model.Ridge(), param_grid=param_grid_ridge, cv=5, n_jobs=-1, verbose=2)

# Fit the grid search to the data
grid_search_ridge.fit(X_train_scaled, y_train)

# Get the best parameters and best estimator
best_params_ridge = grid_search_ridge.best_params_
best_ridge = grid_search_ridge.best_estimator_

# Predict on the validation set using the best estimator
y_pred_best_ridge = best_ridge.predict(X_val_scaled)

# Calculate error metrics
mae_best_ridge = mean_absolute_error(y_val, y_pred_best_ridge)
mse_best_ridge = mean_squared_error(y_val, y_pred_best_ridge)
rmse_best_ridge = np.sqrt(mse_best_ridge)
r2_best_ridge = r2_score(y_val, y_pred_best_ridge)

# Assemble the metrics we're going to write into a collection
metrics_best_ridge = {"mae": mae_best_ridge, "mse": mse_best_ridge, "rmse": rmse_best_ridge, "r2": r2_best_ridge}

# Log the experiment results and artifacts with MLflow
run_name = "Regularized Regression Hyperparameter Tuning"

with mlflow.start_run(run_name=run_name) as run:
    # Log dataset and preprocessing details
    training_data = pd.concat([X_train, y_train], axis=1)
    training_input = mlflow.data.from_pandas(training_data, targets='demand')
    mlflow.log_input(training_input, context="training data")
    mlflow.log_params(feature_params)

    # Log the best parameters found by GridSearchCV
    mlflow.log_params(best_params_ridge)

    # Log the error metrics that were calculated during validation
    mlflow.log_metrics(metrics_best_ridge)

    # Log an instance of the best trained model for later use
    mlflow.sklearn.log_model(sk_model=best_ridge, input_example=X_val_scaled, artifact_path=artifact_path)

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ..........................................alpha=0.5; total time=   0.0s
[CV] END ..........................................alpha=0.5; total time=   0.0s
[CV] END ..........................................alpha=0.5; total time=   0.0s
[CV] END ..........................................alpha=0.5; total time=   0.0s
[CV] END ..........................................alpha=0.5; total time=   0.0s
[CV] END ..........................................alpha=1.0; total time=   0.0s
[CV] END ........................................

## Computing Environment

In [16]:
import sys

print(f'Python version: {sys.version}', end='\n\n')

with open('modelops-requirements.txt', 'r') as file:
    for line in file:
        print(line.strip())

Python version: 3.12.7 | packaged by Anaconda, Inc. | (main, Oct  4 2024, 08:28:27) [Clang 14.0.6 ]

mlflow==2.12.2
numpy==1.26.4
pandas==2.1.4
scikit-learn==1.5.1
