In [0]:
# Install required packages for real-time inference
%pip install databricks-sdk --quiet
%pip install databricks-feature-engineering --quiet
%pip install requests --quiet

dbutils.library.restartPython()

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import os, json, logging, pandas as pd

import sys
sys.path.append(os.path.abspath('../'))

from utils.common_utils import load_config, setup_logging
from utils.data_loader import load_data_from_source

config = load_config('../config/config.yaml')
setup_logging(config)
endpoint_name = config['deployment']['endpoint_name']
model_name = config['mlflow']['databricks']['registered_model_name']
print('Endpoint:', endpoint_name)
print('Model Name:', model_name)

Endpoint: next-best-product-endpoint
Model Name: datafabric_catalog.customer_hc_silver.next_best_product_model


## 1. Prepare Sample Data

In [0]:
# Load customer features from Feature Store or Unity Catalog
if config['data_source']['type'] == 'unity_catalog':
    from utils.common_utils import get_spark_session
    spark = get_spark_session(config)
    
    uc_config = config['data_source']['unity_catalog']
    catalog = uc_config['catalog']
    output_schema = uc_config['output_schema']
    
    feature_store_table = f"{catalog}.{output_schema}.customer_features_fs"
    features_table = f"{catalog}.{output_schema}.customer_features"
    
    print(f"📊 Loading features from Feature Store: {feature_store_table}")
    
    # Try to load from Feature Store first
    try:
        from databricks.feature_engineering import FeatureEngineeringClient
        
        fe = FeatureEngineeringClient()
        features_spark_df = fe.read_table(name=feature_store_table)
        features_df = features_spark_df.toPandas()
        print(f"✅ Loaded features from Feature Store")
        
    except ImportError:
        print(f"⚠️ Feature Engineering client not available, using direct table read")
        features_df = spark.table(features_table).toPandas()
        print(f"✅ Loaded features from Unity Catalog table")
    except Exception as e:
        print(f"⚠️ Error reading from Feature Store: {str(e)}")
        features_df = spark.table(features_table).toPandas()
        print(f"✅ Loaded features from Unity Catalog table")
else:
    # CSV mode
    features_path = os.path.abspath('../data/processed/customer_features.csv')
    print(f"📂 Loading features from: {features_path}")
    features_df = pd.read_csv(features_path)

print(f"✅ Loaded features for {len(features_df)} customers")

# Select a sample customer for testing
sample = features_df.sample(1, random_state=42)
cust_id = int(sample['CUSTOMERID'].iloc[0])

# Prepare features (drop ID columns)
X_raw = sample.drop(['CUSTOMERID', 'PARTYID'], axis=1, errors='ignore')

# Check for categorical columns
cat_cols = X_raw.select_dtypes(include=['object', 'category']).columns.tolist()
if cat_cols:
    print(f"\n⚠️ Found {len(cat_cols)} categorical columns: {cat_cols}")
    print(f"   These will be sent as-is to the endpoint")

print(f"\n👤 Sample Customer ID: {cust_id}")
print(f"📊 Features: {len(X_raw.columns)} columns")
print(f"\nFeature columns: {X_raw.columns.tolist()[:10]}...")

📊 Loading features from Feature Store: datafabric_catalog.ml_outputs.customer_features_fs
✅ Loaded features from Feature Store
✅ Loaded features for 1000 customers

⚠️ Found 4 categorical columns: ['AGE_GROUP', 'TENURE_GROUP', 'MIN_INTEREST_RATE', 'MAX_INTEREST_RATE']
   These will be sent as-is to the endpoint

👤 Sample Customer ID: 1522
📊 Features: 41 columns

Feature columns: ['AGE', 'CUSTOMER_TENURE_DAYS', 'AGE_GROUP', 'TENURE_GROUP', 'TOTAL_ACCOUNTS', 'ACTIVE_ACCOUNTS', 'TOTAL_PRINCIPAL', 'AVG_PRINCIPAL', 'MAX_PRINCIPAL', 'AVG_INTEREST_RATE']...


## 2. Build Payload (dataframe_split)

In [0]:
from decimal import Decimal
import pandas as pd
import numpy as np

# Convert Decimal types to float for JSON serialization
X_processed = X_raw.copy()
for col in X_processed.columns:
    if X_processed[col].dtype == 'object':
        # Check if the column contains Decimal objects
        if len(X_processed[col]) > 0 and isinstance(X_processed[col].iloc[0], Decimal):
            X_processed[col] = X_processed[col].astype(float)

print(f"📊 Original features: {len(X_processed.columns)} columns")
print(f"Columns: {X_processed.columns.tolist()}")

# Apply one-hot encoding ONLY to AGE_GROUP and TENURE_GROUP
# Keep MIN_INTEREST_RATE and MAX_INTEREST_RATE as numeric (matching the NEW model)
cat_cols_to_encode = ['AGE_GROUP', 'TENURE_GROUP']
existing_cat_cols = [col for col in cat_cols_to_encode if col in X_processed.columns]

if existing_cat_cols:
    print(f"\n🔄 Applying one-hot encoding to: {existing_cat_cols}")
    print(f"✅ Keeping MIN_INTEREST_RATE and MAX_INTEREST_RATE as numeric (not one-hot encoded)")
    
    # Apply one-hot encoding with drop_first=True to match training
    X_processed = pd.get_dummies(X_processed, columns=existing_cat_cols, drop_first=True, dtype=int)
    
    print(f"✅ One-hot encoding complete: {len(X_processed.columns)} columns")

# Add missing one-hot encoded columns with 0 values (for categories not in this sample)
expected_age_groups = ['AGE_GROUP_26-35', 'AGE_GROUP_36-45', 'AGE_GROUP_46-55', 'AGE_GROUP_56-65', 'AGE_GROUP_65+']
expected_tenure_groups = ['TENURE_GROUP_1-2Y', 'TENURE_GROUP_6M-1Y']

for col in expected_age_groups + expected_tenure_groups:
    if col not in X_processed.columns:
        X_processed[col] = 0

print(f"\n📊 Final feature count: {len(X_processed.columns)} columns")
print(f"Sample features: {X_processed.columns.tolist()[:15]}...")

# Convert to JSON-serializable format
records = json.loads(X_processed.to_json(orient='records'))

# Build payload
payload_to_send = {'inputs': records}

print("\n📦 Payload Preview:")
print("=" * 80)
print(json.dumps(payload_to_send, indent=2)[:800])
print("...")
print("=" * 80)
print(f"\n📊 Payload size: {len(json.dumps(payload_to_send))} bytes")
print(f"📄 Number of records: {len(payload_to_send['inputs'])}")
print(f"📊 Features per record: {len(payload_to_send['inputs'][0]) if payload_to_send['inputs'] else 0}")

print("\n✅ Payload ready for NEW model (46 features with numeric interest rates)")

📊 Original features: 41 columns
Columns: ['AGE', 'CUSTOMER_TENURE_DAYS', 'AGE_GROUP', 'TENURE_GROUP', 'TOTAL_ACCOUNTS', 'ACTIVE_ACCOUNTS', 'TOTAL_PRINCIPAL', 'AVG_PRINCIPAL', 'MAX_PRINCIPAL', 'AVG_INTEREST_RATE', 'MIN_INTEREST_RATE', 'MAX_INTEREST_RATE', 'AVG_ACCOUNT_AGE_DAYS', 'MIN_ACCOUNT_AGE_DAYS', 'MAX_ACCOUNT_AGE_DAYS', 'NUM_UNIQUE_PRODUCTS', 'NUM_UNIQUE_CHANNELS', 'INACTIVE_ACCOUNTS', 'ACTIVE_ACCOUNT_RATIO', 'TXN_COUNT_SHORT_TERM', 'TXN_AMOUNT_SUM_SHORT_TERM', 'TXN_AMOUNT_MEAN_SHORT_TERM', 'TXN_AMOUNT_MEDIAN_SHORT_TERM', 'TXN_AMOUNT_STD_SHORT_TERM', 'TXN_AMOUNT_MIN_SHORT_TERM', 'TXN_AMOUNT_MAX_SHORT_TERM', 'TXN_COUNT_MEDIUM_TERM', 'TXN_AMOUNT_SUM_MEDIUM_TERM', 'TXN_AMOUNT_MEAN_MEDIUM_TERM', 'TXN_AMOUNT_MEDIAN_MEDIUM_TERM', 'TXN_AMOUNT_STD_MEDIUM_TERM', 'TXN_AMOUNT_MIN_MEDIUM_TERM', 'TXN_AMOUNT_MAX_MEDIUM_TERM', 'TXN_COUNT_LONG_TERM', 'TXN_AMOUNT_SUM_LONG_TERM', 'TXN_AMOUNT_MEAN_LONG_TERM', 'TXN_AMOUNT_MEDIAN_LONG_TERM', 'TXN_AMOUNT_STD_LONG_TERM', 'TXN_AMOUNT_MIN_LONG_TERM', 'TXN

## 3. Invoke Databricks Endpoint

In [0]:
# Invoke the serving endpoint
import requests
import time
from databricks.sdk import WorkspaceClient

print(f"\n🚀 Invoking endpoint: {endpoint_name}")
print(f"👤 Customer ID: {cust_id}")

try:
    # Get workspace client for host information
    w = WorkspaceClient()
    
    # Get host (remove https:// if present)
    host = w.config.host
    if host.startswith('https://'):
        host = host.replace('https://', '')
    
    # Get endpoint URL
    endpoint_url = f"https://{host}/serving-endpoints/{endpoint_name}/invocations"
    
    # Get authentication token from notebook context (works inside Databricks)
    try:
        token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
    except Exception:
        # Fallback to environment variable if not in notebook context
        token = os.environ.get('DATABRICKS_TOKEN')
        if not token:
            raise ValueError("No authentication token available. Set DATABRICKS_TOKEN environment variable.")
    
    print(f"\n🔗 Endpoint URL: {endpoint_url}")
    
    # Try alternative payload format (inputs)
    payload_to_send = {
        'inputs': X_processed.to_dict(orient='records')
    }
    
    print(f"\n📤 Sending request with 'inputs' format...")
    print(f"Payload preview: {str(payload_to_send)[:200]}...")
    
    # Set headers
    headers = {
        'Authorization': f'Bearer {token}',
        'Content-Type': 'application/json'
    }
    
    # Send request
    start_time = time.time()
    resp = requests.post(endpoint_url, json=payload_to_send, headers=headers, timeout=60)
    elapsed_time = time.time() - start_time
    
    print(f"\n⏱️ Response time: {elapsed_time:.2f} seconds")
    print(f"📊 Status code: {resp.status_code}")
    
    if resp.status_code == 200:
        print(f"\n✅ Request successful!")
        response_data = resp.json()
    else:
        print(f"\n❌ Request failed!")
        print(f"\nFull Response:")
        print(resp.text)  # Print full error message
        response_data = None
        
except Exception as e:
    print(f"\n❌ Error invoking endpoint: {str(e)}")
    response_data = None
    raise


🚀 Invoking endpoint: next-best-product-endpoint
👤 Customer ID: 1522

🔗 Endpoint URL: https://adb-1364099644588382.2.azuredatabricks.net/serving-endpoints/next-best-product-endpoint/invocations

📤 Sending request with 'inputs' format...
Payload preview: {'inputs': [{'AGE': 46, 'CUSTOMER_TENURE_DAYS': 170, 'TOTAL_ACCOUNTS': 1.0, 'ACTIVE_ACCOUNTS': 0.0, 'TOTAL_PRINCIPAL': 825000.0, 'AVG_PRINCIPAL': 825000.0, 'MAX_PRINCIPAL': 825000.0, 'AVG_INTEREST_RAT...

⏱️ Response time: 0.14 seconds
📊 Status code: 200

✅ Request successful!


## 4. Response Handling

In [0]:
# Parse and display predictions
if response_data:
    print("\n" + "=" * 80)
    print("PREDICTION RESULTS")
    print("=" * 80)
    
    print(f"\n👤 Customer ID: {cust_id}")
    
    # The response format depends on the model output
    # Typically it's {'predictions': [...]}
    if 'predictions' in response_data:
        predictions = response_data['predictions']
        
        if isinstance(predictions, list) and len(predictions) > 0:
            pred = predictions[0]
            
            # If prediction is a list (multi-class probabilities)
            if isinstance(pred, list):
                print(f"\n🎯 Top 5 Product Recommendations:")
                print("\n{:<6} {:<15} {:<12}".format("Rank", "Product ID", "Probability"))
                print("-" * 40)
                
                # Get top 5 predictions
                top_5_indices = sorted(range(len(pred)), key=lambda i: pred[i], reverse=True)[:5]
                
                for rank, idx in enumerate(top_5_indices, 1):
                    prob = pred[idx]
                    print("{:<6} {:<15} {:<12.4f}".format(rank, idx, prob))
            
            # If prediction is a single value
            else:
                print(f"\n🎯 Predicted Product ID: {pred}")
        
        else:
            print(f"\n📊 Raw predictions: {predictions}")
    
    else:
        print(f"\n📊 Full response:")
        print(json.dumps(response_data, indent=2)[:1000])
    
    print("\n" + "=" * 80)
    print("✅ Real-time inference completed successfully!")
    print("=" * 80)
    
else:
    print("\n❌ No response data available. Check the error above.")


PREDICTION RESULTS

👤 Customer ID: 1522

🎯 Predicted Product ID: 6

✅ Real-time inference completed successfully!


## 5. Best Practices

- Promote model to Production stage before creating endpoint.
- Use staged rollout (shadow traffic) for new versions.
- Monitor latency, error rate, drift.
- Log request IDs for traceability.
- Add input validation & authentication.