# Model B: Vital Sign Forecaster

**Purpose**: Predict NEXT vital values (autoregressive for 5-second simulation)

**Output**: `vital_forecaster.pkl`

**Dataset**: Your cleaned `merged_sepsis_data.csv`

---

## Step 1: Install Packages

In [None]:
!pip install pandas numpy scikit-learn xgboost matplotlib --quiet
print("✅ Packages installed!")

## Step 2: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import pickle
import os
from datetime import datetime

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor

import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print(f"✅ Libraries imported!")

## Step 3: Load Dataset

In [None]:
DATA_PATH = '../merged_sepsis_data.csv'

print(f"Loading: {DATA_PATH}")
df = pd.read_csv(DATA_PATH)

print(f"\n✅ Loaded: {df.shape[0]:,} rows × {df.shape[1]} cols")
print(f"   Patients: {df['Patient_ID'].nunique():,}")

## Step 4: Create Target Variables (Next Values)

In [None]:
# Sort by patient and time
df = df.sort_values(['Patient_ID', 'ICULOS']).reset_index(drop=True)

# Create "next" columns by shifting within each patient
df['HR_next'] = df.groupby('Patient_ID')['HR'].shift(-1)
df['Resp_next'] = df.groupby('Patient_ID')['Resp'].shift(-1)
df['Temp_next'] = df.groupby('Patient_ID')['Temp'].shift(-1)
df['SBP_next'] = df.groupby('Patient_ID')['SBP'].shift(-1)
df['O2Sat_next'] = df.groupby('Patient_ID')['O2Sat'].shift(-1)

# Drop rows with NaN targets (last row per patient)
df = df.dropna(subset=['HR_next', 'Resp_next', 'Temp_next', 'SBP_next', 'O2Sat_next'])

print(f"✅ Created target columns (shifted)")
print(f"   Remaining rows: {len(df):,}")

## Step 5: Feature Engineering

In [None]:
# Add ShockIndex if not already present
if 'ShockIndex' not in df.columns:
    df['ShockIndex'] = df['HR'] / df['SBP'].replace(0, np.nan)
    df['ShockIndex'] = df['ShockIndex'].fillna(0.67).clip(0, 5)
    print("✅ Added ShockIndex")

print(f"Available columns: {list(df.columns)[:15]}...")

## Step 6: Prepare Training Data

In [None]:
# Features (current values)
FEATURE_COLUMNS = [
    'ICULOS',
    'HR',
    'Resp',
    'Temp',
    'SBP',
    'DBP',
    'O2Sat',
    'MAP',
    'ShockIndex'
]

# Targets (next values)
TARGET_COLUMNS = [
    'HR_next',
    'Resp_next',
    'Temp_next',
    'SBP_next',
    'O2Sat_next'
]

# Extract
X = df[FEATURE_COLUMNS].copy()
y = df[TARGET_COLUMNS].copy()

# Fill NaN
X = X.fillna(X.median())
y = y.fillna(y.median())

print(f"Features: {X.shape}")
print(f"Targets: {y.shape}")

In [None]:
# Sample for faster training
SAMPLE_SIZE = 150000

if len(X) > SAMPLE_SIZE:
    idx = np.random.choice(len(X), SAMPLE_SIZE, replace=False)
    X = X.iloc[idx]
    y = y.iloc[idx]
    print(f"✅ Sampled to {len(X):,} rows")

In [None]:
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Train: {X_train_scaled.shape}")
print(f"Test: {X_test_scaled.shape}")

## Step 7: Train Multi-Output Model

In [None]:
print("Training Multi-Output XGBoost...")

base_model = XGBRegressor(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    random_state=42,
    n_jobs=-1,
    verbosity=0
)

model = MultiOutputRegressor(base_model)
model.fit(X_train_scaled, y_train)

print("✅ Training complete!")

## Step 8: Evaluate

In [None]:
y_pred = model.predict(X_test_scaled)

print("Performance Metrics:")
print("-"*40)

metrics = {}
for i, col in enumerate(TARGET_COLUMNS):
    mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
    metrics[col] = {'MAE': mae, 'R2': r2}
    print(f"{col}: MAE={mae:.2f}, R²={r2:.3f}")

## Step 9: Test Autoregressive Prediction

In [None]:
def autoregressive_forecast(initial_vitals: dict, n_steps: int = 5) -> list:
    """
    Generate n_steps of future predictions using autoregressive loop.
    This is what the backend will use for 5-second simulation!
    """
    predictions = []
    current = initial_vitals.copy()
    
    for step in range(n_steps):
        # Build feature vector
        sbp = current.get('SBP', 120)
        dbp = current.get('DBP', 80)
        hr = current.get('HR', 80)
        
        MAP = (sbp + 2 * dbp) / 3
        ShockIndex = hr / sbp if sbp > 0 else 0.67
        
        features = [
            current.get('ICULOS', 1),
            hr,
            current.get('Resp', 18),
            current.get('Temp', 37.0),
            sbp,
            dbp,
            current.get('O2Sat', 97),
            MAP,
            ShockIndex
        ]
        
        # Scale and predict
        X_scaled = scaler.transform([features])
        pred = model.predict(X_scaled)[0]
        
        next_vitals = {
            'sequence_index': step + 1,
            'HR': round(float(pred[0]), 1),
            'Resp': round(float(pred[1]), 1),
            'Temp': round(float(pred[2]), 2),
            'SBP': round(float(pred[3]), 0),
            'O2Sat': round(float(pred[4]), 1)
        }
        
        predictions.append(next_vitals)
        
        # AUTOREGRESSIVE: Feed output back as input
        current = {
            'HR': next_vitals['HR'],
            'Resp': next_vitals['Resp'],
            'Temp': next_vitals['Temp'],
            'SBP': next_vitals['SBP'],
            'DBP': dbp,
            'O2Sat': next_vitals['O2Sat'],
            'ICULOS': current.get('ICULOS', 1) + 0.1
        }
    
    return predictions

# Test
test_input = {
    'HR': 105, 'Resp': 22, 'Temp': 38.3, 
    'SBP': 100, 'DBP': 65, 'O2Sat': 93, 'ICULOS': 10
}

print("Initial:", test_input)
print("\n5-Step Forecast:")
for p in autoregressive_forecast(test_input, 5):
    print(f"  Step {p['sequence_index']}: HR={p['HR']}, Temp={p['Temp']}, SBP={p['SBP']}, O2={p['O2Sat']}")

## Step 10: Save Model

In [None]:
MODEL_FILENAME = 'vital_forecaster.pkl'

model_package = {
    'model': model,
    'scaler': scaler,
    'feature_columns': FEATURE_COLUMNS,
    'target_columns': TARGET_COLUMNS,
    'version': '1.0',
    'trained_at': datetime.now().isoformat(),
    'metrics': metrics
}

with open(MODEL_FILENAME, 'wb') as f:
    pickle.dump(model_package, f)

file_size = os.path.getsize(MODEL_FILENAME) / 1024

print("="*40)
print("VITAL FORECASTER SAVED!")
print("="*40)
print(f"File: {MODEL_FILENAME}")
print(f"Size: {file_size:.1f} KB")

---
## ✅ Model B Complete!

Both models are now ready:
- `sepsis_model.pkl` - Risk classifier
- `vital_forecaster.pkl` - Time-series forecaster

---