
# MLflow Demo: Tracking and Registering a Classification Model

This notebook demonstrates how to use **MLflow** to track machine learning experiments, log parameters and metrics, and register a model to the MLflow Model Registry using the **Breast Cancer Wisconsin** dataset.

### What You'll Learn:
- How to log experiments with MLflow
- How to register a model in the MLflow Model Registry
- How to load the model back from the registry and make predictions


In [1]:

import mlflow
import mlflow.sklearn
import mlflow.pyfunc

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import pandas as pd
import numpy as np


In [2]:

# Set the MLflow tracking URI (local file storage)
import os
mlflow.set_tracking_uri(f"file://{os.path.join(os.getcwd(), 'artifacts')}")

# Set experiment name (creates if not exists)
mlflow.set_experiment("Breast_Cancer_Classification")


2025/06/19 20:31:42 INFO mlflow.tracking.fluent: Experiment with name 'Breast_Cancer_Classification' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///home/vane/dataScience/workshop/artifacts/823996128049102103', creation_time=1750339902669, experiment_id='823996128049102103', last_update_time=1750339902669, lifecycle_stage='active', name='Breast_Cancer_Classification', tags={}>

In [3]:

# Load the dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


In [4]:

# Start MLflow run and log model, params, metrics
with mlflow.start_run():
    clf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    clf.fit(X_train, y_train)

    preds = clf.predict(X_test)
    
    # Evaluation metrics
    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    
    # Log parameters and metrics
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("max_depth", 5)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)
    mlflow.log_metric("f1_score", f1)
    
    # Log and register the model
    mlflow.sklearn.log_model(clf, "model", registered_model_name="BreastCancerRFModel")

    print(f"Accuracy: {acc:.2f}, Precision: {prec:.2f}, Recall: {rec:.2f}, F1: {f1:.2f}")




Accuracy: 0.97, Precision: 0.97, Recall: 0.98, F1: 0.97


Successfully registered model 'BreastCancerRFModel'.
Created version '1' of model 'BreastCancerRFModel'.


In [5]:

# Load the model back from the MLflow Model Registry (Production stage)
model_uri = "models:/BreastCancerRFModel/Production"

# Load model as pyfunc
loaded_model = mlflow.pyfunc.load_model(model_uri)

# Predict on sample input
sample_input = X_test.iloc[:5]
predictions = loaded_model.predict(sample_input)

print("Sample Input Predictions:")
print(predictions)


MlflowException: No versions of model with name 'BreastCancerRFModel' and stage 'Production' found