In [1]:
"""
Hospital Readmission Prediction - Inference & Analysis
=======================================================
Features:
1. Extract feature importance from all models
2. Predict for single patient
3. Predict for batch of patients
4. Complete metrics table for all models
5. Risk stratification and recommendations
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import json
import warnings
warnings.filterwarnings('ignore')
from google.colab import drive

drive.mount('/content/drive')
# Check that the mount worked
!ls /content/drive/MyDrive

# TensorFlow for neural networks
import tensorflow as tf
from tensorflow import keras

from sklearn.metrics import (
    roc_auc_score, accuracy_score, precision_score, recall_score,
    f1_score, brier_score_loss, confusion_matrix, classification_report
)

print("="*80)
print("HOSPITAL READMISSION PREDICTION - INFERENCE & ANALYSIS")
print("="*80)


Mounted at /content/drive
'1708094542 BHARG.pdf'
 2012-05-09catalystdallas-120509090645-phpapp02.pdf
 2985fdf99edf9d4dc42251eb1a26277d.jpg
 2Print
 30-day-ebook1.pdf
'367 Avenida Manzanos.pdf'
'3 BHK Duplex independent house.gdoc'
 470-G1324.pdf
 574062db9f183d363bcb1d9f03e69fc1.jpg
 982763fcbd9dd911c0ea6e0b44618869d07b.pdf
 a470c4a117c6dad31a08568e310ad4a5.jpg
 Accenture-DevOps-brochure-new.pdf
 AI98036FU.pdf
'AMWAY Report .pdf'
'Attraction Course'
 AWS_certified_devops_engineer_professional_blueprint.pdf
 Backpropagation-Algorithm-An-Artificial-Neural-Network-Approach-for-Pattern-Recognition.pdf
 backup-oldlaptop
'Become A Fan [Form].gform'
'Become A Fan.gsheet'
 BerkeleyML
 bettersearchenginetesting-110324134324-phpapp02.pdf
 BizDocs
 Books
 Building-a-Brand-eBook-Section-1.pdf
 Business_India.gslides
'Business Tracking'
"Changing Bits_ Testing Lucene's index durability after crash or power loss.pdf"
'Christmas Boutique at O’Connor Hospital.gdoc'
'Colab Notebooks'
 conet.gsheet
 Con

In [4]:

# ============================================================================
# 1. LOAD ALL MODELS & CONFIGURATION
# ============================================================================
from pathlib import Path

print("\n[1/5] Loading models and configuration...")
# Define base paths
BASE_DIR = Path('/content/drive/MyDrive/BerkeleyML/CapstoneHospitalReadmission') # Current directory
MODELS_DIR = BASE_DIR / 'modelsf'
REPORTS_DIR = BASE_DIR / 'reportsf'
DATA_DIR = BASE_DIR / 'eda' # Where processed_data.csv is located

# Create directories if they don't exist
MODELS_DIR.mkdir(exist_ok=True)
REPORTS_DIR.mkdir(exist_ok=True)

# Define specific file paths
PATHS = {
    # Configuration files
    'tree_config': MODELS_DIR / 'tree_models_config.json',
    'ensemble_config': MODELS_DIR / 'final_ensemble_config.json',

    # Scaler
    'scaler': MODELS_DIR / 'scaler_final.pkl',

    # Tree-based models
    'xgboost': MODELS_DIR / 'xgboost_calibrated_final.pkl',
    'lightgbm': MODELS_DIR / 'lightgbm_calibrated_final.pkl',
    'random_forest': MODELS_DIR / 'random_forest_calibrated_final.pkl',
    'gradient_boosting': MODELS_DIR / 'gradient_boosting_calibrated_final.pkl',
    'stacking': MODELS_DIR / 'stacking_ensemble_final.pkl',
    'voting': MODELS_DIR / 'voting_ensemble_final.pkl',

    # Neural network models
    'deep_nn': MODELS_DIR / 'deep_nn_512_256_128_64.h5',
    'residual_nn': MODELS_DIR / 'residual_nn.h5',
    'attention_nn': MODELS_DIR / 'attention_nn.h5',

    # Predictions
    'tree_predictions': MODELS_DIR / 'tree_model_predictions.npz',
    'nn_predictions': MODELS_DIR / 'final_predictions.npz',
    'test_indices': MODELS_DIR / 'test_indices.npy',

    # Data
    'processed_data': DATA_DIR / 'processed_data.csv',

    # Output files
    'feature_importance': REPORTS_DIR / 'feature_importance_aggregated.csv',
    'metrics_table': REPORTS_DIR / 'all_models_metrics_complete.csv',
    'batch_predictions': REPORTS_DIR / 'example_batch_predictions.csv',
    'feature_plot': 'top_features_importance.png'
}




[1/5] Loading models and configuration...


In [11]:
# Load configuration
with open(PATHS['tree_config'], 'r') as f:
    config = json.load(f)

with open(PATHS['ensemble_config'], 'r') as f:
    ensemble_config = json.load(f)

# Load scaler
scaler = joblib.load(PATHS['scaler'])

# Load tree-based models
models = {
    'XGBoost': joblib.load(PATHS['xgboost']),
    'LightGBM': joblib.load(PATHS['lightgbm']),
    'Random Forest': joblib.load(PATHS['random_forest']),
    'Gradient Boosting': joblib.load(PATHS['gradient_boosting']),
    'Stacking Ensemble': joblib.load(PATHS['stacking']),
    'Voting Ensemble': joblib.load(PATHS['voting'])
}

# Load neural networks
nn_models = {
    'Deep NN': keras.models.load_model(PATHS['deep_nn']),
    'Residual NN': keras.models.load_model(PATHS['residual_nn']),
    'Attention NN': keras.models.load_model(PATHS['attention_nn'])
}

# Correcting the model name in ensemble_config
ensemble_config['model_names'] = [
    'XGBoost', 'LightGBM', 'Random Forest', 'Gradient Boosting',
    'Stacking Ensemble', 'Voting Ensemble', 'Deep NN', 'Residual NN',
    'Attention NN'
]

# Load feature names
df = pd.read_csv(PATHS['processed_data'])
feature_names = [col for col in df.columns if col != 'readmitted_binary']

print(f"✓ Loaded {len(models)} tree models + {len(nn_models)} neural networks")
print(f"✓ Features: {len(feature_names)}")



✓ Loaded 6 tree models + 3 neural networks
✓ Features: 50


In [6]:
# ============================================================================
# 2. EXTRACT FEATURE IMPORTANCE FROM ALL MODELS
# ============================================================================

print("\n[2/5] Extracting feature importance...")

feature_importance_dict = {}

# XGBoost feature importance
xgb_model = models['XGBoost'].calibrated_classifiers_[0].estimator
feature_importance_dict['XGBoost'] = pd.DataFrame({
    'feature': feature_names,
    'importance': xgb_model.feature_importances_
}).sort_values('importance', ascending=False)

# LightGBM feature importance
lgbm_model = models['LightGBM'].calibrated_classifiers_[0].estimator
feature_importance_dict['LightGBM'] = pd.DataFrame({
    'feature': feature_names,
    'importance': lgbm_model.feature_importances_
}).sort_values('importance', ascending=False)

# Random Forest feature importance
rf_model = models['Random Forest'].calibrated_classifiers_[0].estimator
feature_importance_dict['Random Forest'] = pd.DataFrame({
    'feature': feature_names,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

# Gradient Boosting feature importance
gb_model = models['Gradient Boosting'].calibrated_classifiers_[0].estimator
feature_importance_dict['Gradient Boosting'] = pd.DataFrame({
    'feature': feature_names,
    'importance': gb_model.feature_importances_
}).sort_values('importance', ascending=False)

# Aggregate feature importance (average across all tree models)
all_importances = []
for model_name, df_imp in feature_importance_dict.items():
    df_imp_copy = df_imp.copy()
    df_imp_copy['model'] = model_name
    all_importances.append(df_imp_copy)

combined_importance = pd.concat(all_importances)
aggregate_importance = combined_importance.groupby('feature')['importance'].mean().reset_index()
aggregate_importance = aggregate_importance.sort_values('importance', ascending=False)

print("\n📊 TOP 20 MOST IMPORTANT FEATURES (Averaged across all tree models):")
print("="*80)
print(aggregate_importance.head(20).to_string(index=False))

# Visualize top 15 features
plt.figure(figsize=(12, 8))
top_features = aggregate_importance.head(15)
plt.barh(range(len(top_features)), top_features['importance'], color='#3498db')
plt.yticks(range(len(top_features)), top_features['feature'])
plt.xlabel('Average Importance Score', fontsize=12)
plt.ylabel('Feature', fontsize=12)
plt.title('Top 15 Most Important Features (Average across Tree Models)',
         fontsize=14, fontweight='bold')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('top_features_importance.png', dpi=300, bbox_inches='tight')
plt.close()

print("\n✓ Feature importance visualization saved: top_features_importance.png")

# Save feature importance to CSV
aggregate_importance.to_csv(PATHS['feature_importance'], index=False)
print("✓ Feature importance saved: reports/feature_importance_aggregated.csv")




[2/5] Extracting feature importance...

📊 TOP 20 MOST IMPORTANT FEATURES (Averaged across all tree models):
                 feature  importance
         num_medications 1741.774503
        time_in_hospital 1133.813516
                     age  894.790757
        number_diagnoses  855.566561
          num_procedures  784.553872
discharge_disposition_id  548.382648
       admission_type_id  524.780493
      num_lab_procedures  455.517226
        number_inpatient  425.555332
       total_utilization  412.279333
       number_outpatient  276.270506
     admission_source_id  275.013748
                    race  259.262392
            labs_per_day  207.513602
      procedures_per_day  196.518292
               A1Cresult  180.260157
        number_emergency  174.018881
             age_numeric  173.303570
                  gender  160.258674
          med_complexity   67.505471

✓ Feature importance visualization saved: top_features_importance.png
✓ Feature importance saved: reports/feature

In [8]:
# ============================================================================
# 3. COMPLETE METRICS TABLE FOR ALL MODELS
# ============================================================================

print("\n[3/5] Generating comprehensive metrics table...")

# Load test data for evaluation
test_indices = np.load(PATHS['test_indices'])
X_test = df.loc[test_indices, feature_names]
y_test = df.loc[test_indices, 'readmitted_binary']
X_test_scaled = scaler.transform(X_test)

# Load all predictions
tree_preds = np.load(PATHS['tree_predictions'])
nn_preds = np.load(PATHS['nn_predictions'])

# Create comprehensive metrics table
metrics_list = []

def calculate_all_metrics(name, y_true, y_pred, y_proba):
    """Calculate comprehensive metrics for a model"""
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()

    return {
        'Model': name,
        'AUC-ROC': roc_auc_score(y_true, y_proba),
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred, zero_division=0),
        'Recall': recall_score(y_true, y_pred, zero_division=0),
        'F1-Score': f1_score(y_true, y_pred, zero_division=0),
        'Specificity': tn / (tn + fp) if (tn + fp) > 0 else 0,
        'NPV': tn / (tn + fn) if (tn + fn) > 0 else 0,
        'Brier Score': brier_score_loss(y_true, y_proba),
        'True Positives': int(tp),
        'True Negatives': int(tn),
        'False Positives': int(fp),
        'False Negatives': int(fn)
    }

# Helper function to find optimal threshold
def find_optimal_threshold(y_true, y_proba):
    from sklearn.metrics import confusion_matrix
    COST_FP = 500
    COST_FN = 17500
    thresholds = np.linspace(0.1, 0.9, 100)
    costs = []
    for t in thresholds:
        y_pred = (y_proba >= t).astype(int)
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        cost = fp * COST_FP + fn * COST_FN
        costs.append(cost)
    return thresholds[np.argmin(costs)]

# Evaluate tree models
for model_name in ['xgboost', 'lightgbm', 'random_forest', 'gradient_boosting', 'stacking', 'voting']:
    display_name = model_name.replace('_', ' ').title()
    y_proba = tree_preds[model_name]
    optimal_thresh = find_optimal_threshold(y_test, y_proba)
    y_pred = (y_proba >= optimal_thresh).astype(int)
    metrics_list.append(calculate_all_metrics(display_name, y_test, y_pred, y_proba))

# Evaluate neural networks
for model_name in ['deep_nn', 'residual_nn', 'attention_nn']:
    display_name = model_name.replace('_', ' ').title()
    y_proba = nn_preds[model_name]
    optimal_thresh = find_optimal_threshold(y_test, y_proba)
    y_pred = (y_proba >= optimal_thresh).astype(int)
    metrics_list.append(calculate_all_metrics(display_name, y_test, y_pred, y_proba))

# Evaluate final ensemble
y_proba_ensemble = nn_preds['final_ensemble']
optimal_thresh_ensemble = find_optimal_threshold(y_test, y_proba_ensemble)
y_pred_ensemble = (y_proba_ensemble >= optimal_thresh_ensemble).astype(int)
metrics_list.append(calculate_all_metrics('Final Ensemble (Softmax)', y_test, y_pred_ensemble, y_proba_ensemble))

# Create DataFrame
metrics_df = pd.DataFrame(metrics_list)
metrics_df = metrics_df.sort_values('AUC-ROC', ascending=False).reset_index(drop=True)

print("\n" + "="*80)
print("COMPREHENSIVE METRICS TABLE - ALL MODELS")
print("="*80)
print(metrics_df.to_string(index=False))

# Save to CSV
metrics_df.to_csv(PATHS['metrics_table'], index=False)
print("\n✓ Metrics table saved: reports/all_models_metrics_complete.csv")

# Create a prettier display version
print("\n" + "="*80)
print("SIMPLIFIED METRICS TABLE (Main Metrics)")
print("="*80)
display_cols = ['Model', 'AUC-ROC', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'Brier Score']
print(metrics_df[display_cols].to_string(index=False))





[3/5] Generating comprehensive metrics table...

COMPREHENSIVE METRICS TABLE - ALL MODELS
                   Model  AUC-ROC  Accuracy  Precision   Recall  F1-Score  Specificity      NPV  Brier Score  True Positives  True Negatives  False Positives  False Negatives
Final Ensemble (Softmax) 0.662412  0.123514   0.112731 0.997798  0.202575     0.013715 0.980237     0.152241            2266             248            17835                5
             Residual Nn 0.657871  0.118208   0.112243 0.999119  0.201814     0.007576 0.985612     0.554691            2269             137            17946                2
                 Deep Nn 0.653227  0.122679   0.112674 0.998239  0.202492     0.012719 0.982906     0.550997            2267             230            17853                4
            Attention Nn 0.652949  0.124251   0.112815 0.997798  0.202711     0.014544 0.981343     0.553513            2266             263            17820                5
                Lightgbm 0.639182 

In [9]:
# ============================================================================
# 4. SINGLE PATIENT PREDICTION FUNCTION
# ============================================================================

print("\n[4/5] Creating prediction functions...")

def predict_single_patient(patient_data, return_all_models=False):
    """
    Predict readmission risk for a single patient

    Parameters:
    -----------
    patient_data : dict or pd.DataFrame
        Patient features (must match training features)
    return_all_models : bool
        If True, return predictions from all models
        If False, return only final ensemble prediction

    Returns:
    --------
    dict with prediction results
    """
    # Convert to DataFrame if dict
    if isinstance(patient_data, dict):
        patient_df = pd.DataFrame([patient_data])
    else:
        patient_df = patient_data.copy()

    # Ensure correct feature order
    patient_df = patient_df[feature_names]

    # Scale features
    patient_scaled = scaler.transform(patient_df)

    # Get predictions from all models
    predictions = {}

    # Tree models
    for name, model in models.items():
        predictions[name] = model.predict_proba(patient_scaled)[0, 1]

    # Neural networks
    for name, model in nn_models.items():
        predictions[name] = model.predict(patient_scaled, verbose=0)[0, 0]

    # Final ensemble (softmax weighted)
    softmax_weights = np.array(ensemble_config['softmax_weights'])
    model_preds = np.array([predictions[name] for name in ensemble_config['model_names']])
    ensemble_prob = np.average(model_preds, weights=softmax_weights)

    # Risk stratification
    if ensemble_prob >= 0.60:
        risk_level = "HIGH"
        color = "🔴"
        action = "Intensive case management required"
        interventions = [
            "Assign dedicated care coordinator",
            "Post-discharge phone call within 24 hours",
            "Home health visit within 48-72 hours",
            "Pharmacist medication reconciliation",
            "Schedule 7-day follow-up appointment (not 30-day)",
            "Daily monitoring for first 2 weeks"
        ]
    elif ensemble_prob >= 0.35:
        risk_level = "MEDIUM"
        color = "🟡"
        action = "Enhanced discharge planning"
        interventions = [
            "Enhanced discharge instructions (written + verbal)",
            "Phone call within 7 days",
            "Schedule 14-day follow-up appointment",
            "Medication list review",
            "Patient portal messaging",
            "Community resource referrals"
        ]
    else:
        risk_level = "LOW"
        color = "🟢"
        action = "Standard care protocol"
        interventions = [
            "Standard discharge instructions",
            "30-day routine follow-up",
            "Patient portal access",
            "As-needed support"
        ]

    result = {
        'readmission_probability': ensemble_prob,
        'risk_level': risk_level,
        'recommended_action': action,
        'interventions': interventions,
        'cost_avoidance': 17500 if risk_level in ['HIGH', 'MEDIUM'] else 0,
        'intervention_cost': 600 if risk_level == 'HIGH' else (200 if risk_level == 'MEDIUM' else 0)
    }

    if return_all_models:
        result['individual_model_predictions'] = predictions

    # Print formatted output
    print(f"\n{color} PREDICTION RESULT {color}")
    print("="*60)
    print(f"Readmission Probability: {ensemble_prob:.1%}")
    print(f"Risk Level: {risk_level}")
    print(f"Recommended Action: {action}")
    print(f"\nSuggested Interventions:")
    for i, intervention in enumerate(interventions, 1):
        print(f"  {i}. {intervention}")
    print(f"\nFinancial Impact:")
    print(f"  Intervention Cost: ${result['intervention_cost']:,}")
    print(f"  Potential Cost Avoided: ${result['cost_avoidance']:,}")
    print(f"  Net Benefit: ${result['cost_avoidance'] - result['intervention_cost']:,}")

    if return_all_models:
        print(f"\nIndividual Model Predictions:")
        for model_name, prob in predictions.items():
            print(f"  {model_name:25s} {prob:.3f} ({prob*100:.1f}%)")

    return result

def predict_batch_patients(patients_df):
    """
    Predict readmission risk for multiple patients

    Parameters:
    -----------
    patients_df : pd.DataFrame
        DataFrame with patient features (must match training features)

    Returns:
    --------
    pd.DataFrame with predictions and risk levels
    """
    # Ensure correct feature order
    patients_df = patients_df[feature_names]

    # Scale features
    patients_scaled = scaler.transform(patients_df)

    # Get ensemble predictions
    all_predictions = []

    # Collect predictions from all models
    for name, model in models.items():
        preds = model.predict_proba(patients_scaled)[:, 1]
        all_predictions.append(preds)

    for name, model in nn_models.items():
        preds = model.predict(patients_scaled, verbose=0).flatten()
        all_predictions.append(preds)

    # Apply softmax weights
    all_predictions = np.array(all_predictions).T  # Shape: (n_patients, n_models)
    softmax_weights = np.array(ensemble_config['softmax_weights'])
    ensemble_probs = np.average(all_predictions, axis=1, weights=softmax_weights)

    # Create results DataFrame
    results_df = patients_df.copy()
    results_df['readmission_probability'] = ensemble_probs

    # Risk stratification
    results_df['risk_level'] = pd.cut(
        ensemble_probs,
        bins=[0, 0.35, 0.60, 1.0],
        labels=['LOW', 'MEDIUM', 'HIGH']
    )

    # Intervention costs and potential savings
    results_df['intervention_cost'] = results_df['risk_level'].map({
        'LOW': 0,
        'MEDIUM': 200,
        'HIGH': 600
    })

    results_df['cost_avoidance'] = results_df['risk_level'].map({
        'LOW': 0,
        'MEDIUM': 17500,
        'HIGH': 17500
    })

    results_df['net_benefit'] = results_df['cost_avoidance'] - results_df['intervention_cost']

    print(f"\n✓ Predicted for {len(results_df)} patients")
    print(f"\nRisk Distribution:")
    print(results_df['risk_level'].value_counts().sort_index())
    print(f"\nTotal Intervention Cost: ${results_df['intervention_cost'].sum():,.0f}")
    print(f"Total Potential Savings: ${results_df['cost_avoidance'].sum():,.0f}")
    print(f"Total Net Benefit: ${results_df['net_benefit'].sum():,.0f}")

    return results_df

print("✓ Prediction functions created: predict_single_patient(), predict_batch_patients()")



[4/5] Creating prediction functions...
✓ Prediction functions created: predict_single_patient(), predict_batch_patients()


In [12]:
# ============================================================================
# 5. EXAMPLE USAGE & DEMONSTRATION
# ============================================================================

print("\n[5/5] Example usage demonstrations...")

# Example 1: Single patient prediction
print("\n" + "="*80)
print("EXAMPLE 1: SINGLE PATIENT PREDICTION")
print("="*80)

# Create example patient (using median values for demo)
example_patient = {}
for col in feature_names:
    example_patient[col] = df[col].median()

# Modify some values to create high-risk profile
if 'number_inpatient' in feature_names:
    example_patient['number_inpatient'] = 2  # High risk factor
if 'time_in_hospital' in feature_names:
    example_patient['time_in_hospital'] = 8  # Longer stay
if 'num_medications' in feature_names:
    example_patient['num_medications'] = 20  # Many medications

result = predict_single_patient(example_patient, return_all_models=True)

# Example 2: Batch prediction (using test set sample)
print("\n" + "="*80)
print("EXAMPLE 2: BATCH PREDICTION (10 PATIENTS)")
print("="*80)

sample_patients = df.loc[test_indices[:10], feature_names]
batch_results = predict_batch_patients(sample_patients)

print("\nBatch Results Preview:")
print(batch_results[['readmission_probability', 'risk_level', 'intervention_cost', 'net_benefit']].head(10).to_string())

# Save batch results
batch_results.to_csv(PATHS['batch_predictions'], index=False)
print("\n✓ Batch predictions saved: reports/example_batch_predictions.csv")




[5/5] Example usage demonstrations...

EXAMPLE 1: SINGLE PATIENT PREDICTION





🟡 PREDICTION RESULT 🟡
Readmission Probability: 39.0%
Risk Level: MEDIUM
Recommended Action: Enhanced discharge planning

Suggested Interventions:
  1. Enhanced discharge instructions (written + verbal)
  2. Phone call within 7 days
  3. Schedule 14-day follow-up appointment
  4. Medication list review
  5. Patient portal messaging
  6. Community resource referrals

Financial Impact:
  Intervention Cost: $200
  Potential Cost Avoided: $17,500
  Net Benefit: $17,300

Individual Model Predictions:
  XGBoost                   0.101 (10.1%)
  LightGBM                  0.137 (13.7%)
  Random Forest             0.335 (33.5%)
  Gradient Boosting         0.120 (12.0%)
  Stacking Ensemble         0.099 (9.9%)
  Voting Ensemble           0.121 (12.1%)
  Deep NN                   0.875 (87.5%)
  Residual NN               0.861 (86.1%)
  Attention NN              0.824 (82.4%)

EXAMPLE 2: BATCH PREDICTION (10 PATIENTS)


TypeError: Object with dtype category cannot perform the numpy op subtract

In [13]:
# ============================================================================
# 6. CREATE USAGE TEMPLATE
# ============================================================================

print("\n" + "="*80)
print("USAGE TEMPLATE FOR NEW PATIENTS")
print("="*80)

template_code = """
# =============================================================================
# HOW TO USE FOR NEW PATIENTS
# =============================================================================

import pandas as pd
import numpy as np

# Load this inference script
# exec(open('model_inference.py').read())

# ============================================================================
# METHOD 1: Predict for a SINGLE patient
# ============================================================================

# Create patient data dictionary
new_patient = {
    'age_numeric': 65,
    'time_in_hospital': 5,
    'num_lab_procedures': 45,
    'num_procedures': 3,
    'num_medications': 15,
    'number_outpatient': 2,
    'number_emergency': 1,
    'number_inpatient': 0,
    # ... add all required features
}

# Get prediction
result = predict_single_patient(new_patient, return_all_models=True)

# Access results
print(f"Risk: {result['readmission_probability']:.1%}")
print(f"Level: {result['risk_level']}")
print(f"Action: {result['recommended_action']}")

# ============================================================================
# METHOD 2: Predict for MULTIPLE patients (batch)
# ============================================================================

# Load patient data from CSV or database
patients_df = pd.read_csv('new_patients.csv')

# Get predictions
predictions_df = predict_batch_patients(patients_df)

# Save results
predictions_df.to_csv('predictions_output.csv', index=False)

# Filter high-risk patients
high_risk = predictions_df[predictions_df['risk_level'] == 'HIGH']
print(f"High-risk patients: {len(high_risk)}")

# ============================================================================
# METHOD 3: Real-time API integration
# ============================================================================

def api_predict(patient_json):
    '''
    Wrapper for API integration
    '''
    import json
    patient_dict = json.loads(patient_json)
    result = predict_single_patient(patient_dict)
    return json.dumps(result, default=str)

# Example API call
patient_json = '{"age_numeric": 65, "time_in_hospital": 5, ...}'
response = api_predict(patient_json)
"""

print(template_code)

# Save template
with open('prediction_usage_template.py', 'w') as f:
    f.write(template_code)

print("\n✓ Usage template saved: prediction_usage_template.py")




USAGE TEMPLATE FOR NEW PATIENTS

# HOW TO USE FOR NEW PATIENTS

import pandas as pd
import numpy as np

# Load this inference script
# exec(open('model_inference.py').read())

# METHOD 1: Predict for a SINGLE patient

# Create patient data dictionary
new_patient = {
    'age_numeric': 65,
    'time_in_hospital': 5,
    'num_lab_procedures': 45,
    'num_procedures': 3,
    'num_medications': 15,
    'number_outpatient': 2,
    'number_emergency': 1,
    'number_inpatient': 0,
    # ... add all required features
}

# Get prediction
result = predict_single_patient(new_patient, return_all_models=True)

# Access results
print(f"Risk: {result['readmission_probability']:.1%}")
print(f"Level: {result['risk_level']}")
print(f"Action: {result['recommended_action']}")

# METHOD 2: Predict for MULTIPLE patients (batch)

# Load patient data from CSV or database
patients_df = pd.read_csv('new_patients.csv')

# Get predictions
predictions_df = predict_batch_patients(patients_df)

# Save results
pre

In [14]:
# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "="*80)
print("✅ COMPLETE! ALL ARTIFACTS GENERATED")
print("="*80)

print("\nGenerated Files:")
print("  📊 reports/feature_importance_aggregated.csv")
print("  📊 reports/all_models_metrics_complete.csv")
print("  📊 reports/example_batch_predictions.csv")
print("  📈 top_features_importance.png")
print("  📝 prediction_usage_template.py")

print("\nKey Functions Available:")
print("  • predict_single_patient(patient_data, return_all_models=False)")
print("  • predict_batch_patients(patients_df)")

print("\nTop 5 Most Important Features:")
for i, row in aggregate_importance.head(5).iterrows():
    print(f"  {i+1}. {row['feature']:30s} (importance: {row['importance']:.4f})")

best_model = metrics_df.iloc[0]
print(f"\nBest Performing Model: {best_model['Model']}")
print(f"  AUC-ROC: {best_model['AUC-ROC']:.4f}")
print(f"  Precision: {best_model['Precision']:.4f}")
print(f"  Recall: {best_model['Recall']:.4f}")
print(f"  F1-Score: {best_model['F1-Score']:.4f}")

print("\n" + "="*80)
print("Ready for production deployment! 🚀")
print("="*80)


✅ COMPLETE! ALL ARTIFACTS GENERATED

Generated Files:
  📊 reports/feature_importance_aggregated.csv
  📊 reports/all_models_metrics_complete.csv
  📊 reports/example_batch_predictions.csv
  📈 top_features_importance.png
  📝 prediction_usage_template.py

Key Functions Available:
  • predict_single_patient(patient_data, return_all_models=False)
  • predict_batch_patients(patients_df)

Top 5 Most Important Features:
  34. num_medications                (importance: 1741.7745)
  45. time_in_hospital               (importance: 1133.8135)
  6. age                            (importance: 894.7908)
  36. number_diagnoses               (importance: 855.5666)
  35. num_procedures                 (importance: 784.5539)

Best Performing Model: Final Ensemble (Softmax)
  AUC-ROC: 0.6624
  Precision: 0.1127
  Recall: 0.9978
  F1-Score: 0.2026

Ready for production deployment! 🚀
