# Model Registration with Feast-Compatible Schema

This notebook registers a housing price prediction model using the cleaned data with proper snake_case feature names that align with the Feast feature store schema.

In [1]:
# MLflow Model Registration - Feast Compatible
import mlflow
import mlflow.sklearn
import mlflow.pyfunc
from mlflow.tracking import MlflowClient
from mlflow.models import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
import numpy as np
from datetime import datetime

# =============================================================================
# CONFIGURATION - UPDATE THESE VALUES
# =============================================================================
MLFLOW_TRACKING_URI = "https://mlflow-server-555196125082.us-west1.run.app"  # Replace with your MLflow server
MODEL_NAME = "HousingModel"  
EXPERIMENT_NAME = "housing-feast-compatible"

# Set up MLflow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
# Create experiment if it doesn't exist
try:
    mlflow.set_experiment(EXPERIMENT_NAME)
except:
    mlflow.create_experiment(EXPERIMENT_NAME)
    mlflow.set_experiment(EXPERIMENT_NAME)

print(f"MLflow URI: {MLFLOW_TRACKING_URI}")
print(f"Model Name: {MODEL_NAME}")
print(f"Experiment: {EXPERIMENT_NAME}")

2025/08/14 07:56:48 INFO mlflow.tracking.fluent: Experiment with name 'housing-feast-compatible' does not exist. Creating a new experiment.


MLflow URI: https://mlflow-server-555196125082.us-west1.run.app
Model Name: HousingModel
Experiment: housing-feast-compatible


In [2]:
# =============================================================================
# STEP 1: LOAD AND EXAMINE CLEANED DATA
# =============================================================================

# Load the cleaned parquet file with proper schema
data_path = 'feast-container-housing-postgres-v2/data/house_data_cleaned.parquet'
data = pd.read_parquet(data_path)

print(f"📊 Dataset shape: {data.shape}")
print(f"📋 Columns: {data.columns.tolist()}")
print(f"\n🔍 Data types:")
print(data.dtypes)
print(f"\n📈 First 5 rows:")
data.head()

📊 Dataset shape: (3000, 15)
📋 Columns: ['price', 'city', 'state', 'bedrooms', 'bathrooms', 'area_sqft', 'lot_size', 'year_built', 'days_on_market', 'property_type', 'listing_agent', 'status', 'zipcode_encoded', 'mls_id', 'event_timestamp']

🔍 Data types:
price                            int64
city                             int64
state                            int64
bedrooms                         int64
bathrooms                        int64
area_sqft                        int64
lot_size                         int64
year_built                       int64
days_on_market                   int64
property_type                    int64
listing_agent                    int64
status                           int64
zipcode_encoded                float64
mls_id                           int64
event_timestamp    datetime64[ns, UTC]
dtype: object

📈 First 5 rows:


Unnamed: 0,price,city,state,bedrooms,bathrooms,area_sqft,lot_size,year_built,days_on_market,property_type,listing_agent,status,zipcode_encoded,mls_id,event_timestamp
0,554217,3,0,1,3,772,4757,1959,101,4,0,0,554217.0,104635,2000-01-01 00:00:00+00:00
1,164454,0,2,1,1,2348,3615,1969,46,0,1,2,164454.0,535721,2000-01-02 00:00:00+00:00
2,1249331,2,4,6,1,3630,9369,1990,59,4,4,0,1249331.0,900458,2000-01-03 00:00:00+00:00
3,189267,0,4,2,1,605,8804,1958,119,0,3,1,189267.0,318589,2000-01-04 00:00:00+00:00
4,465778,1,0,3,2,1711,9260,2020,26,4,3,1,465778.0,899716,2000-01-05 00:00:00+00:00


In [3]:
# =============================================================================
# STEP 2: PREPARE FEATURES FOR TRAINING
# =============================================================================

# Define feature columns (excluding target 'price' and metadata columns)
feature_columns = [
    'city', 'state', 'bedrooms', 'bathrooms', 'area_sqft', 
    'lot_size', 'year_built', 'days_on_market', 'property_type', 
    'listing_agent', 'status', 'zipcode_encoded'
]

# Prepare features and target
X = data[feature_columns].copy()
y = data['price'].copy()

print(f"🎯 Target variable: price")
print(f"📊 Features ({len(feature_columns)}): {feature_columns}")
print(f"\n📈 Target statistics:")
print(f"  Mean: ${y.mean():,.2f}")
print(f"  Std:  ${y.std():,.2f}")
print(f"  Min:  ${y.min():,.2f}")
print(f"  Max:  ${y.max():,.2f}")

# Check for missing values
print(f"\n🔍 Missing values in features:")
missing = X.isnull().sum()
print(missing[missing > 0] if missing.sum() > 0 else "None")

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=None
)

print(f"\n📊 Data split:")
print(f"  Training samples: {X_train.shape[0]}")
print(f"  Testing samples:  {X_test.shape[0]}")

🎯 Target variable: price
📊 Features (12): ['city', 'state', 'bedrooms', 'bathrooms', 'area_sqft', 'lot_size', 'year_built', 'days_on_market', 'property_type', 'listing_agent', 'status', 'zipcode_encoded']

📈 Target statistics:
  Mean: $810,859.36
  Std:  $399,732.02
  Min:  $100,283.00
  Max:  $1,499,473.00

🔍 Missing values in features:
None

📊 Data split:
  Training samples: 2400
  Testing samples:  600


In [4]:
# =============================================================================
# STEP 3: TRAIN AND REGISTER MODEL WITH FEAST COMPATIBILITY
# =============================================================================

def train_and_register_model():
    """Train model with proper feature schema for Feast compatibility"""
    
    with mlflow.start_run(run_name=f"feast_compatible_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}"):
        
        # 1. Train Random Forest model
        print("🤖 Training RandomForestRegressor...")
        model = RandomForestRegressor(
            n_estimators=200,
            max_depth=15,
            min_samples_split=5,
            min_samples_leaf=2,
            random_state=42,
            n_jobs=-1
        )
        
        model.fit(X_train, y_train)
        
        # 2. Make predictions
        y_pred_train = model.predict(X_train)
        y_pred_test = model.predict(X_test)
        
        # 3. Calculate metrics
        train_rmse = root_mean_squared_error(y_train, y_pred_train)
        test_rmse = root_mean_squared_error(y_test, y_pred_test)
        train_mae = mean_absolute_error(y_train, y_pred_train)
        test_mae = mean_absolute_error(y_test, y_pred_test)
        train_r2 = r2_score(y_train, y_pred_train)
        test_r2 = r2_score(y_test, y_pred_test)
        
        print(f"\n📊 Model Performance:")
        print(f"  Training RMSE: ${train_rmse:,.2f}")
        print(f"  Testing RMSE:  ${test_rmse:,.2f}")
        print(f"  Training MAE:  ${train_mae:,.2f}")
        print(f"  Testing MAE:   ${test_mae:,.2f}")
        print(f"  Training R²:   {train_r2:.4f}")
        print(f"  Testing R²:    {test_r2:.4f}")
        
        # 4. Log parameters
        mlflow.log_param("model_type", "RandomForestRegressor")
        mlflow.log_param("n_estimators", 200)
        mlflow.log_param("max_depth", 15)
        mlflow.log_param("min_samples_split", 5)
        mlflow.log_param("min_samples_leaf", 2)
        mlflow.log_param("features", feature_columns)
        mlflow.log_param("feature_count", len(feature_columns))
        mlflow.log_param("data_source", "house_data_cleaned.parquet")
        mlflow.log_param("feast_compatible", True)
        
        # 5. Log metrics
        mlflow.log_metric("train_rmse", train_rmse)
        mlflow.log_metric("test_rmse", test_rmse)
        mlflow.log_metric("train_mae", train_mae)
        mlflow.log_metric("test_mae", test_mae)
        mlflow.log_metric("train_r2", train_r2)
        mlflow.log_metric("test_r2", test_r2)
        
        # 6. Create model signature for input validation
        signature = infer_signature(X_train, y_pred_train)
        
        # 7. Log and register model with signature and input example
        print(f"\n🔄 Registering model as '{MODEL_NAME}'...")
        mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path="model",
            registered_model_name=MODEL_NAME,
            signature=signature,
            input_example=X_train.head(3)
        )
        
        run_id = mlflow.active_run().info.run_id
        print(f"✅ Model registered! Run ID: {run_id}")
        
        # 8. Log feature importance
        feature_importance = pd.DataFrame({
            'feature': feature_columns,
            'importance': model.feature_importances_
        }).sort_values('importance', ascending=False)
        
        print(f"\n🎯 Top 5 Most Important Features:")
        for idx, row in feature_importance.head().iterrows():
            print(f"  {row['feature']}: {row['importance']:.4f}")
        
        # Log feature importance as artifact
        feature_importance.to_csv("feature_importance.csv", index=False)
        mlflow.log_artifact("feature_importance.csv")
        
        return run_id, model, feature_importance

# Train and register the model
run_id, trained_model, feature_importance = train_and_register_model()

🤖 Training RandomForestRegressor...

📊 Model Performance:
  Training RMSE: $37,752.81
  Testing RMSE:  $80,963.55
  Training MAE:  $9,412.76
  Testing MAE:   $18,566.29
  Training R²:   0.9910
  Testing R²:    0.9608





🔄 Registering model as 'HousingModel'...


Successfully registered model 'HousingModel'.
2025/08/14 07:57:21 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: HousingModel, version 1
Created version '1' of model 'HousingModel'.


✅ Model registered! Run ID: a1d53c7c55404007917305ea253aec2b

🎯 Top 5 Most Important Features:
  zipcode_encoded: 0.9937
  days_on_market: 0.0013
  area_sqft: 0.0010
  lot_size: 0.0009
  year_built: 0.0009
🏃 View run feast_compatible_model_20250814_075705 at: https://mlflow-server-555196125082.us-west1.run.app/#/experiments/1/runs/a1d53c7c55404007917305ea253aec2b
🧪 View experiment at: https://mlflow-server-555196125082.us-west1.run.app/#/experiments/1


In [5]:
# =============================================================================
# STEP 4: PROMOTE TO PRODUCTION
# =============================================================================

def promote_to_production(model_name, run_id=None):
    """Promote latest model version to Production"""
    
    client = MlflowClient()
    
    try:
        if run_id:
            versions = client.search_model_versions(f"run_id='{run_id}'")
            version = versions[0].version
        else:
            latest_versions = client.get_latest_versions(model_name, stages=["None"])
            version = latest_versions[0].version
        
        print(f"🔄 Found model version: {version}")
        
        # Transition to Production
        client.transition_model_version_stage(
            name=model_name,
            version=version,
            stage="Production"
        )
        
        print(f"✅ Version {version} promoted to Production!")
        return version
        
    except Exception as e:
        print(f"❌ Error promoting model: {e}")
        return None

# Promote model to production
production_version = promote_to_production(MODEL_NAME, run_id)

🔄 Found model version: 1


  client.transition_model_version_stage(


✅ Version 1 promoted to Production!


In [30]:
# =============================================================================
# STEP 5: TEST MODEL LOADING AND FEAST COMPATIBILITY
# =============================================================================

def test_feast_compatible_model(model_name, stage="Production"):
    """Test loading the registered model with Feast-compatible features"""
    
    model_uri = f"models:/{model_name}/{stage}"
    print(f"🔄 Testing model loading from: {model_uri}")
    
    try:
        # Load model
        loaded_model = mlflow.pyfunc.load_model(model_uri)
        print("✅ Model loaded successfully!")
        
        # Test prediction with actual feature format that Feast will provide
        print("\n🧪 Testing with Feast-compatible feature format...")
        
        # Create sample data in the exact format Feast will provide
        sample_features = pd.DataFrame({
            'city': [1, 2, 0],
            'state': [0, 1, 2], 
            'bedrooms': [3, 4, 2],
            'bathrooms': [2, 3, 1],
            'area_sqft': [1500, 2200, 800],
            'lot_size': [5000, 7500, 3000],
            'year_built': [2000, 1995, 2010],
            'days_on_market': [45, 30, 90],
            'property_type': [0, 1, 0],
            'listing_agent': [1, 2, 3],
            'status': [1, 0, 2],
            'zipcode_encoded': [450000.0, 380000.0, 520000.0]
        })
        
        print(f"📊 Sample features:")
        print(sample_features)
        
        # Make predictions
        predictions = loaded_model.predict(sample_features)
        print(f"\n🔮 Predictions: {predictions}")
        print(f"💰 Formatted predictions:")
        for i, pred in enumerate(predictions):
            print(f"  Sample {i+1}: ${pred:,.2f}")
        
        # Test with a single sample (how FastAPI will call it)
        single_sample = sample_features.iloc[[0]]
        single_prediction = loaded_model.predict(single_sample)
        print(f"\n🎯 Single sample prediction: ${single_prediction[0]:,.2f}")
        
        return True, loaded_model
        
    except Exception as e:
        print(f"❌ Model loading/prediction failed: {e}")
        return False, None

# Test the model
if production_version:
    success, loaded_model = test_feast_compatible_model(MODEL_NAME, "Production")
else:
    print("⚠️ Skipping model testing due to promotion failure")

🔄 Testing model loading from: models:/HousingModel/Production


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 13.84it/s]


✅ Model loaded successfully!

🧪 Testing with Feast-compatible feature format...
📊 Sample features:
   city  state  bedrooms  bathrooms  area_sqft  lot_size  year_built  \
0     1      0         3          2       1500      5000        2000   
1     2      1         4          3       2200      7500        1995   
2     0      2         2          1        800      3000        2010   

   days_on_market  property_type  listing_agent  status  zipcode_encoded  
0              45              0              1       1         450000.0  
1              30              1              2       0         380000.0  
2              90              0              3       2         520000.0  

🔮 Predictions: [450390.13109885 379754.9567381  526422.78112915]
💰 Formatted predictions:
  Sample 1: $450,390.13
  Sample 2: $379,754.96
  Sample 3: $526,422.78

🎯 Single sample prediction: $450,390.13


In [None]:
# =============================================================================
# STEP 7: FEAST FASTAPI SERVICE INTEGRATION
# =============================================================================

import requests
import json

# Feast FastAPI service configuration
FEAST_API_URL = "https://feast-server-555196125082.us-west1.run.app"  # Update with your Feast service URL
# For production: "https://feast-service-url.com"

print("🔧 Feast FastAPI Service Configuration:")
print(f"  Service URL: {FEAST_API_URL}")
print(f"  Health Check: {FEAST_API_URL}/health")
print(f"  Feature Retrieval: {FEAST_API_URL}/get-online-features")

def check_feast_service_health():
    """Check if Feast service is running"""
    try:
        response = requests.get(f"{FEAST_API_URL}/health", timeout=5)
        if response.status_code == 200:
            print("✅ Feast service is healthy!")
            return True
        else:
            print(f"⚠️ Feast service returned status: {response.status_code}")
            return False
    except requests.exceptions.RequestException as e:
        print(f"❌ Cannot connect to Feast service: {e}")
        print("📝 Note: Make sure Feast container is running on the specified URL")
        return False



# Check service health and get available features
feast_healthy = check_feast_service_health()


🔧 Feast FastAPI Service Configuration:
  Service URL: https://feast-server-571828190906.us-west1.run.app
  Health Check: https://feast-server-571828190906.us-west1.run.app/health
  Feature Retrieval: https://feast-server-571828190906.us-west1.run.app/get-online-features
✅ Feast service is healthy!


In [32]:
# =============================================================================
# STEP 8: ONLINE FEATURE RETRIEVAL VIA FEAST API
# =============================================================================

def get_features_from_feast_api(mls_ids):
    """Retrieve features from Feast FastAPI service"""
    
    print(f"🔄 Retrieving features via Feast API for MLS IDs: {mls_ids}")
    
    # Prepare the request payload for Feast API
    request_payload = {
        "features": [
            "housing_features:city",
            "housing_features:state", 
            "housing_features:bedrooms",
            "housing_features:bathrooms",
            "housing_features:area_sqft",
            "housing_features:lot_size",
            "housing_features:year_built",
            "housing_features:days_on_market",
            "housing_features:property_type",
            "housing_features:listing_agent",
            "housing_features:status",
            "housing_features:zipcode_encoded"
        ],
        "entities": {
            "mls_id": mls_ids
        }
    }
    
    try:
        # Make API call to Feast service
        response = requests.post(
            f"{FEAST_API_URL}/get-online-features",
            json=request_payload,
            headers={"Content-Type": "application/json"}
        )
        
        if response.status_code == 200:
            # Parse the response
            feast_response = response.json()
            
            
            print(feast_response)
            
            # Extract features from Feast response
            # Feast typically returns features in a specific format
            values_only = [entry["values"] for entry in feast_response["results"]]
            print(values_only)

            rows = list(map(list, zip(*values_only)))

            column_names = feast_response["metadata"]["feature_names"]

            df = pd.DataFrame(rows, columns=column_names)

            
            return df            
        else:
            print(f"❌ Feast API request failed with status {response.status_code}")
            print(f"Response: {response.text}")
            return None
            
    except requests.exceptions.RequestException as e:
        print(f"❌ Error calling Feast API: {e}")
        return None



def retrieve_features_with_fallback(mls_ids):
    """Try real Feast API first, fallback to simulation if needed"""
    
    if feast_healthy:
        # Try real Feast API
        features = get_features_from_feast_api(mls_ids)
        if features is not None:
            return features, "feast_api"
    

# Test feature retrieval with sample MLS IDs
sample_mls_ids = [104635, 535721, 900458]
retrieved_features, source = retrieve_features_with_fallback(sample_mls_ids)
print(f"\\n📊 Features retrieved via: {source}")
print(retrieved_features)

🔄 Retrieving features via Feast API for MLS IDs: [104635, 535721, 900458]
{'metadata': {'feature_names': ['mls_id', 'property_type', 'city', 'year_built', 'status', 'bathrooms', 'area_sqft', 'listing_agent', 'lot_size', 'days_on_market', 'state', 'zipcode_encoded', 'bedrooms']}, 'results': [{'values': [104635, 535721, 900458], 'statuses': ['PRESENT', 'PRESENT', 'PRESENT'], 'event_timestamps': ['1970-01-01T00:00:00Z', '1970-01-01T00:00:00Z', '1970-01-01T00:00:00Z']}, {'values': [4, 0, 4], 'statuses': ['PRESENT', 'PRESENT', 'PRESENT'], 'event_timestamps': ['2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z', '2000-01-03T00:00:00Z']}, {'values': [3, 0, 2], 'statuses': ['PRESENT', 'PRESENT', 'PRESENT'], 'event_timestamps': ['2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z', '2000-01-03T00:00:00Z']}, {'values': [1959, 1969, 1990], 'statuses': ['PRESENT', 'PRESENT', 'PRESENT'], 'event_timestamps': ['2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z', '2000-01-03T00:00:00Z']}, {'values': [0, 2, 0], 'statu

In [24]:
# =============================================================================
# STEP 9: END-TO-END SCORING WITH FEAST API
# =============================================================================

def end_to_end_scoring_with_feast_api(mls_ids, model_name="HousingModel", stage="Production"):
    """Complete online scoring workflow using Feast FastAPI service"""
    
    print(f"🚀 End-to-End Scoring with Feast API")
    print(f"📋 Input: MLS IDs {mls_ids}")
    print(f"🤖 Model: {model_name} ({stage})")
    print(f"🌐 Feast API: {FEAST_API_URL}")
    print("=" * 60)
    
    # Step 1: Retrieve features from Feast API
    print("\\n🔍 STEP 1: Feature Retrieval from Feast API")
    features, source = retrieve_features_with_fallback(mls_ids)
    
    if features is None:
        print("❌ Feature retrieval failed")
        return None
    
    print(f"✅ Features retrieved via: {source}")
    
    # Step 2: Load production model from MLflow
    print("\\n🤖 STEP 2: Loading Production Model from MLflow")
    try:
        model_uri = f"models:/{model_name}/{stage}"
        loaded_model = mlflow.pyfunc.load_model(model_uri)
        print(f"✅ Model loaded from: {model_uri}")
    except Exception as e:
        print(f"❌ Failed to load model: {e}")
        return None
    
    # Step 3: Make predictions
    print("\\n🔮 STEP 3: Making Predictions")
    try:
        predictions = loaded_model.predict(features)
        print(f"✅ Generated {len(predictions)} predictions")
        
        # Step 4: Format results for API response
        print("\\n💰 STEP 4: Formatting Results")
        results = []
        for i, (mls_id, prediction) in enumerate(zip(mls_ids, predictions)):
            result = {
                'mls_id': int(mls_id),
                'predicted_price': float(prediction),
                'formatted_price': f"${prediction:,.2f}",
                'features_source': source
            }
            results.append(result)
            print(f"  🏠 MLS {mls_id}: {result['formatted_price']}")
        
        # Step 5: Create complete API response
        print("\\n📦 STEP 5: Complete API Response")
        api_response = {
            'success': True,
            'predictions': results,
            'model_info': {
                'model_name': model_name,
                'model_stage': stage,
                'model_uri': model_uri,
                'mlflow_tracking_uri': MLFLOW_TRACKING_URI
            },
            'feast_info': {
                'feast_api_url': FEAST_API_URL,
                'features_source': source,
                'feature_count': len(features.columns)
            },
            'metadata': {
                'timestamp': datetime.now().isoformat(),
                'request_id': f"req_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
                'prediction_count': len(predictions)
            }
        }
        
        print("📋 API Response Summary:")
        print(f"  ✅ Success: {api_response['success']}")
        print(f"  📊 Predictions: {len(api_response['predictions'])}")
        print(f"  🎯 Model: {api_response['model_info']['model_name']}")
        print(f"  🌐 Features: {api_response['feast_info']['features_source']}")
        
        return api_response
        
    except Exception as e:
        print(f"❌ Prediction failed: {e}")
        return {
            'success': False,
            'error': str(e),
            'metadata': {
                'timestamp': datetime.now().isoformat()
            }
        }

# Test the complete workflow
print("🎯 Testing Complete Online Scoring Pipeline\\n")
scoring_result = end_to_end_scoring_with_feast_api([104635, 535721, 900458])

if scoring_result and scoring_result.get('success'):
    print("🎉 SUCCESS! Complete scoring pipeline working!")
    print("📋 Sample API Response JSON:")
    print(scoring_result)
    print(json.dumps(scoring_result, indent=2)[:1000] + "..." if len(json.dumps(scoring_result, indent=2)) > 1000 else json.dumps(scoring_result, indent=2))
else:
    print("❌ Scoring pipeline failed")

🎯 Testing Complete Online Scoring Pipeline\n
🚀 End-to-End Scoring with Feast API
📋 Input: MLS IDs [104635, 535721, 900458]
🤖 Model: HousingModel (Production)
🌐 Feast API: https://feast-server-571828190906.us-west1.run.app
\n🔍 STEP 1: Feature Retrieval from Feast API
🔄 Retrieving features via Feast API for MLS IDs: [104635, 535721, 900458]
{'metadata': {'feature_names': ['mls_id', 'listing_agent', 'lot_size', 'bedrooms', 'state', 'city', 'status', 'property_type', 'days_on_market', 'area_sqft', 'bathrooms', 'zipcode_encoded', 'year_built']}, 'results': [{'values': [104635, 535721, 900458], 'statuses': ['PRESENT', 'PRESENT', 'PRESENT'], 'event_timestamps': ['1970-01-01T00:00:00Z', '1970-01-01T00:00:00Z', '1970-01-01T00:00:00Z']}, {'values': [0, 1, 4], 'statuses': ['PRESENT', 'PRESENT', 'PRESENT'], 'event_timestamps': ['2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z', '2000-01-03T00:00:00Z']}, {'values': [4757, 3615, 9369], 'statuses': ['PRESENT', 'PRESENT', 'PRESENT'], 'event_timestamps': 

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 13.49it/s]


✅ Model loaded from: models:/HousingModel/Production
\n🔮 STEP 3: Making Predictions
✅ Generated 3 predictions
\n💰 STEP 4: Formatting Results
  🏠 MLS 104635: $552,382.04
  🏠 MLS 535721: $164,622.90
  🏠 MLS 900458: $1,250,716.93
\n📦 STEP 5: Complete API Response
📋 API Response Summary:
  ✅ Success: True
  📊 Predictions: 3
  🎯 Model: HousingModel
  🌐 Features: feast_api
🎉 SUCCESS! Complete scoring pipeline working!
📋 Sample API Response JSON:
{'success': True, 'predictions': [{'mls_id': 104635, 'predicted_price': 552382.0359126985, 'formatted_price': '$552,382.04', 'features_source': 'feast_api'}, {'mls_id': 535721, 'predicted_price': 164622.90210299424, 'formatted_price': '$164,622.90', 'features_source': 'feast_api'}, {'mls_id': 900458, 'predicted_price': 1250716.9310476193, 'formatted_price': '$1,250,716.93', 'features_source': 'feast_api'}], 'model_info': {'model_name': 'HousingModel', 'model_stage': 'Production', 'model_uri': 'models:/HousingModel/Production', 'mlflow_tracking_uri': 

In [None]:
# =============================================================================
# STEP 10: PRODUCTION FASTAPI INTEGRATION WITH FEAST API
# =============================================================================

def generate_production_fastapi_code():
    """Generate production FastAPI code that uses Feast API for feature retrieval"""
    
    print("🔧 Production FastAPI + Feast API + MLflow Integration:\\n")
    
    fastapi_code = f'''
# =============================================================================
# Production FastAPI with Feast API Integration
# =============================================================================

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import mlflow.pyfunc
import pandas as pd
import requests
import json
from typing import List
from datetime import datetime
import os
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(
    title="Housing Price Prediction API",
    description="ML prediction service using Feast features and MLflow models",
    version="2.0.0"
)

# =============================================================================
# CONFIGURATION
# =============================================================================

# MLflow Configuration
MLFLOW_TRACKING_URI = "{MLFLOW_TRACKING_URI}"
MODEL_NAME = "{MODEL_NAME}"
MODEL_STAGE = "Production"

# Feast API Configuration
FEAST_API_URL = os.getenv("FEAST_API_URL", "http://feast-service:8080")
FEAST_TIMEOUT = int(os.getenv("FEAST_TIMEOUT", "10"))

# Feature configuration - matches your Feast feature view
REQUIRED_FEATURES = [
    "housing_features:city",
    "housing_features:state", 
    "housing_features:bedrooms",
    "housing_features:bathrooms",
    "housing_features:area_sqft",
    "housing_features:lot_size",
    "housing_features:year_built",
    "housing_features:days_on_market",
    "housing_features:property_type",
    "housing_features:listing_agent",
    "housing_features:status",
    "housing_features:zipcode_encoded"
]

MODEL_FEATURE_ORDER = [
    'city', 'state', 'bedrooms', 'bathrooms', 'area_sqft', 
    'lot_size', 'year_built', 'days_on_market', 'property_type', 
    'listing_agent', 'status', 'zipcode_encoded'
]

# =============================================================================
# STARTUP - LOAD MODEL
# =============================================================================

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
model_uri = f"models://{{MODEL_NAME}}/{{MODEL_STAGE}}"
model = None

@app.on_event("startup")
async def startup_event():
    global model
    try:
        logger.info(f"Loading model from {{model_uri}}")
        model = mlflow.pyfunc.load_model(model_uri)
        logger.info("✅ Model loaded successfully")
    except Exception as e:
        logger.error(f"❌ Failed to load model: {{e}}")
        raise e

# =============================================================================
# REQUEST/RESPONSE MODELS
# =============================================================================

class PredictionRequest(BaseModel):
    mls_ids: List[int]

class PredictionResult(BaseModel):
    mls_id: int
    predicted_price: float
    formatted_price: str

class PredictionResponse(BaseModel):
    success: bool
    predictions: List[PredictionResult]
    model_info: dict
    feast_info: dict
    metadata: dict
    error: str = None

# =============================================================================
# FEAST API INTEGRATION
# =============================================================================

def get_features_from_feast(mls_ids: List[int]) -> pd.DataFrame:
    \"\"\"Retrieve features from Feast FastAPI service\"\"\"
    
    logger.info(f"Retrieving features for MLS IDs: {{mls_ids}}")
    
    # Prepare Feast API request
    feast_request = {{
        "features": REQUIRED_FEATURES,
        "entities": {{
            "mls_id": mls_ids
        }}
    }}
    
    try:
        # Call Feast API
        response = requests.post(
            f"{{FEAST_API_URL}}/get-online-features",
            json=feast_request,
            headers={{"Content-Type": "application/json"}},
            timeout=FEAST_TIMEOUT
        )
        
        if response.status_code != 200:
            raise HTTPException(
                status_code=502,
                detail=f"Feast API error: {{response.status_code}} - {{response.text}}"
            )
        
        # Parse Feast response
        feast_response = response.json()
        
        # Convert to DataFrame format for model
        features_data = {{}}
        results = feast_response.get('results', [])
        
        for feature_name in REQUIRED_FEATURES:
            clean_name = feature_name.split(':')[1]  # Remove feature_view prefix
            features_data[clean_name] = []
            
            for result in results:
                feature_values = result.get('values', {{}})
                features_data[clean_name].append(feature_values.get(feature_name))
        
        # Create DataFrame with correct feature order
        features_df = pd.DataFrame(features_data)
        features_df = features_df[MODEL_FEATURE_ORDER]  # Ensure correct order
        
        logger.info(f"✅ Retrieved {{len(features_df)}} feature records")
        return features_df
        
    except requests.exceptions.RequestException as e:
        logger.error(f"Feast API connection error: {{e}}")
        raise HTTPException(
            status_code=502, 
            detail=f"Cannot connect to Feast service: {{str(e)}}"
        )
    except Exception as e:
        logger.error(f"Feature retrieval error: {{e}}")
        raise HTTPException(
            status_code=500,
            detail=f"Feature retrieval failed: {{str(e)}}"
        )

# =============================================================================
# PREDICTION ENDPOINTS
# =============================================================================

@app.post("/predict", response_model=PredictionResponse)
async def predict_house_prices(request: PredictionRequest):
    \"\"\"Predict house prices using Feast features and MLflow model\"\"\"
    
    if model is None:
        raise HTTPException(status_code=503, detail="Model not loaded")
    
    try:
        start_time = datetime.now()
        
        # Step 1: Get features from Feast
        features_df = get_features_from_feast(request.mls_ids)
        
        # Step 2: Make predictions
        predictions = model.predict(features_df)
        
        # Step 3: Format results
        results = []
        for mls_id, prediction in zip(request.mls_ids, predictions):
            results.append(PredictionResult(
                mls_id=mls_id,
                predicted_price=float(prediction),
                formatted_price=f"${{prediction:,.2f}}"
            ))
        
        end_time = datetime.now()
        processing_time = (end_time - start_time).total_seconds() * 1000
        
        return PredictionResponse(
            success=True,
            predictions=results,
            model_info={{
                "model_name": MODEL_NAME,
                "model_stage": MODEL_STAGE,
                "model_uri": model_uri,
                "mlflow_tracking_uri": MLFLOW_TRACKING_URI
            }},
            feast_info={{
                "feast_api_url": FEAST_API_URL,
                "feature_count": len(MODEL_FEATURE_ORDER),
                "features_retrieved": len(features_df)
            }},
            metadata={{
                "timestamp": datetime.now().isoformat(),
                "processing_time_ms": processing_time,
                "prediction_count": len(predictions)
            }}
        )
        
    except HTTPException:
        raise  # Re-raise HTTP exceptions
    except Exception as e:
        logger.error(f"Prediction error: {{e}}")
        raise HTTPException(status_code=500, detail=f"Prediction failed: {{str(e)}}")

# =============================================================================
# HEALTH CHECK ENDPOINTS
# =============================================================================

@app.get("/health")
async def health_check():
    \"\"\"Health check for the prediction service\"\"\"
    
    # Check model status
    model_status = model is not None
    
    # Check Feast API status
    feast_status = False
    try:
        response = requests.get(f"{{FEAST_API_URL}}/health", timeout=5)
        feast_status = response.status_code == 200
    except:
        pass
    
    overall_status = model_status and feast_status
    
    return {{
        "status": "healthy" if overall_status else "unhealthy",
        "model_loaded": model_status,
        "feast_api_available": feast_status,
        "feast_api_url": FEAST_API_URL,
        "model_info": {{
            "name": MODEL_NAME,
            "stage": MODEL_STAGE,
            "uri": model_uri
        }} if model_status else None,
        "timestamp": datetime.now().isoformat()
    }}

@app.get("/")
async def root():
    \"\"\"API information\"\"\"
    return {{
        "service": "Housing Price Prediction API",
        "version": "2.0.0",
        "model": MODEL_NAME,
        "endpoints": {{
            "predict": "/predict",
            "health": "/health",
            "docs": "/docs"
        }},
        "integration": {{
            "feast_api": FEAST_API_URL,
            "mlflow": MLFLOW_TRACKING_URI
        }}
    }}

# =============================================================================
# MAIN
# =============================================================================

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8080)
'''
    
    print(fastapi_code)
    
    # Save to file
    with open('production_fastapi_feast_integration.py', 'w') as f:
        f.write(fastapi_code)
    
    print("\\n💾 Production code saved to 'production_fastapi_feast_integration.py'")
    
    # Generate requirements
    requirements = '''fastapi==0.104.1
uvicorn==0.24.0
mlflow==2.8.1
pandas==2.1.4
requests==2.31.0
pydantic==2.5.2
numpy==1.24.3
scikit-learn==1.3.2
'''
    
    with open('fastapi_requirements.txt', 'w') as f:
        f.write(requirements)
    
    print("💾 Requirements saved to 'fastapi_requirements.txt'")
    
    print("\\n🚀 Deployment Instructions:")
    print("1. Update FastAPI container Dockerfile to use production_fastapi_feast_integration.py")
    print("2. Update requirements.txt with fastapi_requirements.txt contents")
    print("3. Set environment variables:")
    print("   - FEAST_API_URL=http://feast-service:8080")
    print("   - FEAST_TIMEOUT=10")
    print("4. Deploy with network connectivity between FastAPI and Feast containers")
    print("5. Test with: POST /predict {\\"mls_ids\\": [104635, 535721]}")
    
    print("\\n🔗 API Endpoints:")
    print("- POST /predict - Make predictions")
    print("- GET /health - Service health check") 
    print("- GET / - API information")
    print("- GET /docs - Interactive API documentation")

# Generate the production integration code
generate_production_fastapi_code()"

In [None]:
# =============================================================================
# STEP 7: FEAST CLIENT SETUP
# =============================================================================

from feast import FeatureStore
import os

# Feast configuration - Update these based on your Feast deployment
FEAST_REPO_PATH = "feast-container-housing-postgres-v2"

# Set environment variables for Feast (these would normally be set in your deployment)
# For local testing, we'll set them programmatically
os.environ.setdefault("FEAST_REGISTRY_PATH", "postgresql://user:password@localhost:5432/feast_registry")
os.environ.setdefault("FEAST_ONLINE_STORE_HOST", "localhost")
os.environ.setdefault("FEAST_ONLINE_STORE_PORT", "5432")
os.environ.setdefault("FEAST_ONLINE_STORE_DATABASE", "feast_online")
os.environ.setdefault("FEAST_ONLINE_STORE_USER", "user")
os.environ.setdefault("FEAST_ONLINE_STORE_PASSWORD", "password")
os.environ.setdefault("FEAST_OFFLINE_STORE_PROJECT_ID", "mlops-intro-461805")
os.environ.setdefault("FEAST_OFFLINE_STORE_DATASET", "feast_housing")

print("🔧 Feast Configuration:")
print(f"  Repository Path: {FEAST_REPO_PATH}")
print(f"  Registry: PostgreSQL")
print(f"  Online Store: PostgreSQL") 
print(f"  Offline Store: BigQuery")

# Initialize Feast client
try:
    store = FeatureStore(repo_path=FEAST_REPO_PATH)
    print("✅ Feast client initialized successfully!")
    
    # List available feature views
    feature_views = store.list_feature_views()
    print(f"\n📊 Available Feature Views:")
    for fv in feature_views:
        print(f"  - {fv.name}: {len(fv.schema)} features")
        
except Exception as e:
    print(f"❌ Feast client initialization failed: {e}")
    print("📝 Note: This is expected if Feast server is not running locally")
    store = None

# Feast Integration and Model Scoring

This section demonstrates online feature retrieval from Feast and model scoring for production inference.