In [6]:
# Simple MLflow Model Registration - Notebook Ready
import mlflow
import mlflow.sklearn
import mlflow.pyfunc
from mlflow.tracking import MlflowClient

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import root_mean_squared_error
import pandas as pd
import numpy as np
from datetime import datetime

# =============================================================================
# CONFIGURATION - UPDATE THESE VALUES
# =============================================================================
MLFLOW_TRACKING_URI = "https://mlflow-server-571828190906.us-west1.run.app"  # Replace with your MLflow server
MODEL_NAME = "MyModel"  
EXPERIMENT_NAME = "model-serving-experiment"

# Set up MLflow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)

print(f"🔧 MLflow URI: {MLFLOW_TRACKING_URI}")
print(f"🔧 Model Name: {MODEL_NAME}")

# =============================================================================
# STEP 1: TRAIN AND REGISTER MODEL
# =============================================================================

def quick_model_registration():
    """Quick model training and registration"""
    
    with mlflow.start_run(run_name=f"quick_registration_{datetime.now().strftime('%H%M%S')}"):
        
        # 1. Loading the supplied data in the /data directory
        data = pd.read_csv('data/us_house_Sales_data.csv')

        # 1.5 Prepping the data for training
        data['Price'] = data['Price'].replace(r'[\$,]','',regex=True).astype(int)
        data.drop(columns=['Address', 'MLS ID', 'Listing URL'], axis=1, inplace=True)

        label_encoders = {}
        for col in ['City', 'State', 'Property Type', 'Listing Agent', 'Status']:
            le = LabelEncoder()
            data[col] = le.fit_transform(data[col])
            label_encoders[col] = le # Stores this in the label_encoders dict earlier

        data['Bedrooms'] = data['Bedrooms'].str.extract(r'(\d+)').astype(int)
        data['Bathrooms'] = data['Bathrooms'].str.extract(r'(\d+)').astype(int)

        for col in ['Area (Sqft)', 'Lot Size']:
            data[col] = data[col].str.replace('sqft', '').str.replace(',','').str.extract(r'(\d+)').astype(int)

        # Encoding zipcode based on the target variable 'Price', which will lead to data leakage, but oh well
        data['Zipcode_encoded'] = data.groupby('Zipcode')['Price'].transform('mean')
        data.drop(columns=['Zipcode'], axis = 1, inplace=True)

        # 2. Train model (replace with your model)
        X = data.drop(columns = ['Price'], axis = 1)
        y = data['Price']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

        model = RandomForestRegressor(n_estimators=100, random_state= 42, n_jobs=-1)
        model.fit(X_train, y_train)
        
        # 3. Evaluate
        y_pred = model.predict(X_test)
        rmse = root_mean_squared_error(y_test, y_pred)
        print(f"RMSE: {rmse:.4f}")
        
        # 4. Log parameters and metrics
        mlflow.log_param("model_type", "RandomForestRegressor")
        mlflow.log_param("n_estimators", 100)
        mlflow.log_metric("rmse", rmse)
        
        # 5. Register model
        print(f"Registering model as '{MODEL_NAME}'...")
        mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path="model",
            registered_model_name=MODEL_NAME
        )
        
        run_id = mlflow.active_run().info.run_id
        print(f"✅ Model registered! Run ID: {run_id}")
        
        return run_id, model

# =============================================================================
# STEP 2: PROMOTE TO PRODUCTION
# =============================================================================

def promote_to_production(model_name, run_id=None):
    """Promote latest model version to Production"""
    
    client = MlflowClient()
    
    # Get the latest version
    try:
        if run_id:
            versions = client.search_model_versions(f"run_id='{run_id}'")
            version = versions[0].version
        else:
            latest_versions = client.get_latest_versions(model_name, stages=["None"])
            version = latest_versions[0].version
        
        print(f"Found version: {version}")
        
        # Transition to Production
        client.transition_model_version_stage(
            name=model_name,
            version=version,
            stage="Production"
        )
        
        print(f"Version {version} promoted to Production!")
        return version
        
    except Exception as e:
        print(f"Error promoting model: {e}")
        return None

# =============================================================================
# STEP 3: TEST LOADING
# =============================================================================

def test_model_loading(model_name, stage="Production"):
    """Test loading the registered model"""
    
    model_uri = f"models:/{model_name}/{stage}"
    print(f"Testing model loading from: {model_uri}")
    
    try:
        # Load model
        loaded_model = mlflow.pyfunc.load_model(model_uri)
        print("✅ Model loaded successfully!")
        
        # Test prediction with sample data
        test_data = np.random.rand(5, 12)  # 5 samples, 20 features
        predictions = loaded_model.predict(test_data)
        print(f"🔮 Sample predictions: {predictions}")
        
        return True
        
    except Exception as e:
        print(f"❌ Model loading failed: {e}")
        return False

# =============================================================================
# EXECUTION - RUN THIS SECTION
# =============================================================================

print("🚀 Starting model registration...")

# Step 1: Train and register
run_id, trained_model = quick_model_registration()

# Step 2: Promote to production
version = promote_to_production(MODEL_NAME, run_id)

# Step 3: Test loading
if version:
    success = test_model_loading(MODEL_NAME, "Production")
    
    if success:
        print(f"\n🎉 SUCCESS! Your model is ready for Cloud Run!")
        print(f"📋 Use this in your Cloud Run app:")
        print(f"    model = mlflow.pyfunc.load_model('models:/{MODEL_NAME}/Production')")
    else:
        print(f"\n⚠️ Model registered but loading failed. Check your MLflow server.")
else:
    print(f"\n❌ Failed to promote model to production.")

print(f"\n📊 MLflow UI: {MLFLOW_TRACKING_URI}")

🔧 MLflow URI: https://mlflow-server-571828190906.us-west1.run.app
🔧 Model Name: MyModel
🚀 Starting model registration...
RMSE: 91705.6442




Registering model as 'MyModel'...


Registered model 'MyModel' already exists. Creating a new version of this model...
2025/07/23 00:23:24 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: MyModel, version 3
Created version '3' of model 'MyModel'.


✅ Model registered! Run ID: 64637108629641e594e184bbd6c39a53
🏃 View run quick_registration_002315 at: https://mlflow-server-571828190906.us-west1.run.app/#/experiments/1/runs/64637108629641e594e184bbd6c39a53
🧪 View experiment at: https://mlflow-server-571828190906.us-west1.run.app/#/experiments/1
Found version: 3
Version 3 promoted to Production!
Testing model loading from: models:/MyModel/Production


  client.transition_model_version_stage(
Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00,  9.11it/s]

✅ Model loaded successfully!
🔮 Sample predictions: [101847.84 101924.73 101973.04 102158.57 102133.29]

🎉 SUCCESS! Your model is ready for Cloud Run!
📋 Use this in your Cloud Run app:
    model = mlflow.pyfunc.load_model('models:/MyModel/Production')

📊 MLflow UI: https://mlflow-server-571828190906.us-west1.run.app





In [11]:
# 1. Loading the supplied data in the /data directory
data = pd.read_csv('data/us_house_Sales_data.csv')

# 1.5 Prepping the data for training
data['Price'] = data['Price'].replace(r'[\$,]','',regex=True).astype(int)
data.drop(columns=['Address', 'MLS ID', 'Listing URL'], axis=1, inplace=True)

label_encoders = {}
for col in ['City', 'State', 'Property Type', 'Listing Agent', 'Status']:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le # Stores this in the label_encoders dict earlier

data['Bedrooms'] = data['Bedrooms'].str.extract(r'(\d+)').astype(int)
data['Bathrooms'] = data['Bathrooms'].str.extract(r'(\d+)').astype(int)

for col in ['Area (Sqft)', 'Lot Size']:
    data[col] = data[col].str.replace('sqft', '').str.replace(',','').str.extract(r'(\d+)').astype(int)

# Encoding zipcode based on the target variable 'Price', which will lead to data leakage, but oh well
data['Zipcode_encoded'] = data.groupby('Zipcode')['Price'].transform('mean')
data.drop(columns=['Zipcode'], axis = 1, inplace=True)

# 2. Train model (replace with your model)
X = data.drop(columns = ['Price'], axis = 1)
y = data['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [23]:
import mlflow.pyfunc

MLFLOW_TRACKING_URI = "https://mlflow-server-571828190906.us-west1.run.app"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

# Model configuration - Updated to use Model Registry
MODEL_NAME = "MyModel"
MODEL_STAGE = "Production"  # or "Staging"
MODEL_URI = f"models:/{MODEL_NAME}/{MODEL_STAGE}"

model = mlflow.pyfunc.load_model(MODEL_URI)
model_version = mlflow.MlflowClient().get_model_version(MODEL_NAME, "3")  # Get latest version
print(model)

Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00,  8.08it/s]

mlflow.pyfunc.loaded_model:
  artifact_path: gs://jaguar-bucket-deployml-2025-id14/1/models/m-b292d89845364e979459e65093fa74eb/artifacts
  flavor: mlflow.sklearn
  run_id: 64637108629641e594e184bbd6c39a53






In [27]:
model.predict(X_train)

array([1449218.29,  601042.65, 1088662.58, ...,  695706.88,  951554.41,
       1285890.59], shape=(2400,))

In [None]:
# This code snippet is demonstrating how to register an existing trained model using MLflow and promote it to production. Here's a breakdown of what each part of the code is doing:
# Quick registration of an existing trained model
import mlflow
import mlflow.sklearn

# Assuming you already have a trained model called 'your_model'
mlflow.set_tracking_uri("https://mlflow-server-571828190906.us-west1.run.app")

# Train your model first
test_data = np.random.rand(5, 12)  # 5 samples, 20 features
X_train.loc[1697]
predictions = model.predict(test_data)
print(f"🔮 Sample predictions: {predictions}")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# This creates your_model
your_model = RandomForestClassifier(n_estimators=100, random_state=42)
your_model.fit(X_train, y_train)  # Now your_model is trained


with mlflow.start_run():
    mlflow.sklearn.log_model(
        sk_model=your_model,  # Your existing trained model
        artifact_path="model",
        registered_model_name="MyModel"
    )
    
    # Promote to production
    from mlflow.tracking import MlflowClient
    client = MlflowClient()
    latest_version = client.get_latest_versions("MyModel", stages=["None"])[0]
    client.transition_model_version_stage(
        name="MyModel",
        version=latest_version.version,
        stage="Production"
    )

print("✅ Model registered and promoted to Production!")

City                    1.0
State                   0.0
Bedrooms                6.0
Bathrooms               2.0
Area (Sqft)          2299.0
Lot Size             1931.0
Year Built           1987.0
Days on Market         67.0
Property Type           0.0
Listing Agent           3.0
Status                  2.0
Zipcode_encoded    262341.0
Name: 1697, dtype: float64

In [19]:
X_train.index

Index([ 642,  700,  226, 1697, 1010, 1572,  776, 1730, 2826, 2228,
       ...
       1482,  330, 1238,  466, 2169, 1638, 1095, 1130, 1294,  860],
      dtype='int64', length=2400)