# 🧪 Notebook 7 — Model Deployment & Inference

**Objective:**  
 
Deploy the best model from Notebook 6 (Advanced Experiments) with:

- Reproducible inference on new data

- Risk bands and simple recommendations

- Feature contributions for interpretability

- Deployment-ready pipeline


## 7.1 Load Best Model Pipeline

In [None]:
import os
import joblib
import pandas as pd

best_model_path = "models/deployment/best_model_pipeline.pkl"
pipeline_best = joblib.load(best_model_path)
print(f"✅ Loaded best model pipeline from {best_model_path}")


✅ Loaded best model pipeline from models/deployment/best_model_pipeline.pkl


## 7.2 Helper Functions for Inference

In [None]:
import numpy as np

def get_expected_features(model_pipeline):
    """
    Extract original feature names from the pipeline.
    """
    if "preprocessor" in model_pipeline.named_steps:
        preprocessor = model_pipeline.named_steps["preprocessor"]
        if hasattr(preprocessor, "feature_names_in_"):
            return list(preprocessor.feature_names_in_)
    return None

def align_input(sample_data: pd.DataFrame, expected_features):
    """
    Align new input to match training schema.
    Missing cols -> filled with 0
    Extra cols -> dropped
    """
    return sample_data.reindex(columns=expected_features, fill_value=0)

def predict_pipeline(model_pipeline, new_data: pd.DataFrame):
    """
    Run inference using a preprocessing + model pipeline.
    Returns predicted class and probability.
    """
    pred_class = model_pipeline.predict(new_data)
    pred_proba = model_pipeline.predict_proba(new_data)[:, 1]
    return pred_class, pred_proba

def predict_from_dict(model_pipeline, patient_dict: dict):
    """
    Convenience wrapper: pass patient record as dict.
    Auto-aligns to training schema.
    """
    df = pd.DataFrame([patient_dict])
    expected_features = get_expected_features(model_pipeline)
    if expected_features is not None:
        df = align_input(df, expected_features)
    return predict_pipeline(model_pipeline, df)


## 7.3 Enhanced Inference with Risk Bands & Feature Contributions

In [None]:
def risk_band(prob):
    """Translate probability into Low / Medium / High risk."""
    if prob < 0.2:
        return "Low"
    elif prob < 0.5:
        return "Medium"
    else:
        return "High"

def enhanced_predict(model_pipeline, new_data: pd.DataFrame, top_n=3):
    """
    Enhanced prediction: class, probability, risk band,
    top contributing features, and recommendation.
    """
    # Align features
    expected_features = get_expected_features(model_pipeline)
    if expected_features is not None:
        new_data = align_input(new_data, expected_features)

    # Predict class and probability
    pred_class = int(model_pipeline.predict(new_data)[0])
    pred_proba = float(model_pipeline.predict_proba(new_data)[:, 1][0])
    pred_proba_pct = round(pred_proba * 100, 1)  # convert to percentage with 1 decimal
    band = risk_band(pred_proba)

    # Recommendation
    recommendation = (
        "Maintain healthy lifestyle" if band == "Low"
        else "Recommend further testing"
    )

    # Feature contributions
    preprocessor = model_pipeline.named_steps.get("preprocessor")
    feature_names = (
        preprocessor.get_feature_names_out()
        if hasattr(preprocessor, "get_feature_names_out")
        else [f"f{i}" for i in range(new_data.shape[1])]
    )

    contributions = None
    # Logistic Regression → scaled input × coefficients
    if "log_reg" in model_pipeline.named_steps:
        model = model_pipeline.named_steps["log_reg"]
        X_scaled = preprocessor.transform(new_data)
        contributions = (X_scaled.toarray() if hasattr(X_scaled, "toarray") else X_scaled)[0] * model.coef_[0]

    # Tree-based models → approximate with feature importances
    elif any(k in model_pipeline.named_steps for k in ["rf", "xgb", "lgbm"]):
        model = list(model_pipeline.named_steps.values())[-1]
        importances = model.feature_importances_
        contributions = importances * pred_proba  # rough approximation

    # Build top contributions dataframe
    if contributions is not None and len(contributions) == len(feature_names):
        contrib_df = pd.DataFrame({
            "Feature": feature_names,
            "Contribution": contributions
        }).reindex(feature_names)
        contrib_df = contrib_df.reindex(contrib_df.Contribution.abs().sort_values(ascending=False).index)
        top_contrib = contrib_df.head(top_n)
    else:
        top_contrib = pd.DataFrame(columns=["Feature", "Contribution"])

    return {
        "Prediction": pred_class,
        "Probability": pred_proba_pct,  # now in %
        "Risk Band": band,
        "Recommendation": recommendation,
        "Top Contributions": top_contrib
    }


## 7.4 Test Enhanced Inference with Sample Patient

In [None]:
# Example patient
sample_patient = {
    "age": 55,
    "sex": 1,
    "cp": 3,
    "trestbps": 240,
    "chol": 220,
    "fbs": 0,
    "restecg": 1,
    "thalch": 150,
    "exang": 0,
    "oldpeak": 1.5,
}

sample_df = pd.DataFrame([sample_patient])
result = enhanced_predict(pipeline_best, sample_df)

print("🔹 Best Model Enhanced Prediction")
print("Prediction:", result["Prediction"])
print("Probability:", result["Probability"])
print("Risk Band:", result["Risk Band"])
print("Recommendation:", result["Recommendation"])


high_risk_patient = {
    "age": 68,
    "sex": 1,          # male
    "cp": 4,           # typical angina
    "trestbps": 180,   # high resting blood pressure
    "chol": 300,       # high cholesterol
    "fbs": 1,          # fasting blood sugar > 120 mg/dl
    "restecg": 2,      # abnormal ECG
    "thalch": 120,     # low max heart rate achieved
    "exang": 1,        # exercise-induced angina
    "oldpeak": 3.0     # ST depression
}

high_risk_df = pd.DataFrame([high_risk_patient])
result_high = enhanced_predict(pipeline_best, high_risk_df)

print("🔹 High-Risk Patient Prediction")
print("Prediction:", result_high["Prediction"])
print("Probability:", result_high["Probability"], "%")
print("Risk Band:", result_high["Risk Band"])
print("Recommendation:", result_high["Recommendation"])
print("Top Contributions:\n", result_high["Top Contributions"])


🔹 Best Model Enhanced Prediction
Prediction: 0
Probability: 44.7
Risk Band: Medium
Recommendation: Recommend further testing
🔹 High-Risk Patient Prediction
Prediction: 1
Probability: 51.4 %
Risk Band: High
Recommendation: Recommend further testing
Top Contributions:
               Feature  Contribution
num__id           NaN           NaN
num__age          NaN           NaN
num__trestbps     NaN           NaN


## 7.5 Save Deployment Pipeline (optional backup)

In [None]:
os.makedirs("models/deployment", exist_ok=True)
joblib.dump(pipeline_best, "models/deployment/best_model_pipeline.pkl")
print("✅ Best model pipeline saved for deployment.")
