# RNN Approach 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load synthetic Data
df = pd.read_csv("/Users/christinamaher/Desktop/simulated_patient_data.csv")

# Visualize features and Class Breakdown 
# 1. Class Breakdown
plt.figure(figsize=(5, 4))
sns.countplot(x="readmitted", data=df)
plt.title("ICU Readmission Class Breakdown")
plt.xticks([0, 1], ['Not Readmitted', 'Readmitted'])
plt.ylabel("Number of Patients")
plt.xlabel("")
plt.tight_layout()
plt.show()

# 2. Gender by Readmission
plt.figure(figsize=(6, 4))
sns.countplot(x="gender", hue="readmitted", data=df)
plt.title("Gender vs Readmission")
plt.xlabel("Gender")
plt.ylabel("Count")
plt.legend(title="Readmitted")
plt.tight_layout()
plt.show()

# 3. Race by Readmission
plt.figure(figsize=(8, 4))
sns.countplot(x="race", hue="readmitted", data=df)
plt.title("Race vs Readmission")
plt.xlabel("Race")
plt.ylabel("Count")
plt.legend(title="Readmitted")
plt.tight_layout()
plt.show()

# 4. Age Distribution 
plt.figure(figsize=(6, 4))
sns.violinplot(x="readmitted", y="age", data=df, inner="quartile")
plt.title("Age Distribution by Readmission")
plt.xticks([0, 1], ['Not Readmitted', 'Readmitted'])
plt.xlabel("")
plt.tight_layout()
plt.show()

# 5. Heart Rate over Time (Mean ± STD) 
time_cols = [col for col in df.columns if col.startswith("heart_rate_t")]
time = list(range(len(time_cols)))

mean_hr = df[df["readmitted"] == 0][time_cols].mean()
std_hr = df[df["readmitted"] == 0][time_cols].std()
mean_hr_pos = df[df["readmitted"] == 1][time_cols].mean()
std_hr_pos = df[df["readmitted"] == 1][time_cols].std()

plt.figure(figsize=(10, 4))
plt.plot(time, mean_hr, label="Not Readmitted", color='blue')
plt.fill_between(time, mean_hr - std_hr, mean_hr + std_hr, alpha=0.2, color='blue')
plt.plot(time, mean_hr_pos, label="Readmitted", color='red')
plt.fill_between(time, mean_hr_pos - std_hr_pos, mean_hr_pos + std_hr_pos, alpha=0.2, color='red')
plt.xlabel("Time (hours)")
plt.ylabel("Heart Rate")
plt.title("Mean Heart Rate Trajectory by Readmission")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

n_patients = 1000
n_timepoints = 48  # hourly measurements

patience = 5  # stop if no val loss improvement after this many epochs
min_delta = 0.001  # minimum change to qualify as improvement
best_val_loss = float('inf')
epochs_no_improve = 0

# Preprocess
df["gender"] = LabelEncoder().fit_transform(df["gender"])
df["race"] = LabelEncoder().fit_transform(df["race"])

static_cols = ["age", "gender", "race"]
time_cols = [col for col in df.columns if "_t" in col]
X = df[static_cols + time_cols].values
y = df["readmitted"].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

n_static = len(static_cols)
n_features = 10
X_train_seq = X_train[:, n_static:].reshape(-1, n_timepoints, n_features)
X_test_seq = X_test[:, n_static:].reshape(-1, n_timepoints, n_features)

static_train = np.repeat(X_train[:, :n_static].reshape(-1, 1, n_static), n_timepoints, axis=1)
static_test = np.repeat(X_test[:, :n_static].reshape(-1, 1, n_static), n_timepoints, axis=1)

X_train_final = np.concatenate([X_train_seq, static_train], axis=2)
X_test_final = np.concatenate([X_test_seq, static_test], axis=2)

class ICUReadmissionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.y)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_rate):
        super().__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers,
                           batch_first=True, dropout=dropout_rate)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, 2)
    def forward(self, x):
        _, (hn, _) = self.rnn(x)
        out = self.dropout(hn[-1])
        return self.fc(out)

# Grid Search
param_grid = {
    "hidden_size": [64],
    "num_layers": [2],
    "dropout_rate": [0.4],
    "lr": [0.0005],
    "batch_size": [32]
}
grid = list(ParameterGrid(param_grid))

best_loss = float('inf')
best_model = None
best_params = {}
best_train_losses = []
best_val_losses = []

for params in grid:
    print(f"\n🔧 Trying: {params}")
    train_loader = DataLoader(ICUReadmissionDataset(X_train_final, y_train),
                              batch_size=params["batch_size"], shuffle=True)
    val_loader = DataLoader(ICUReadmissionDataset(X_test_final, y_test), batch_size=32)

    model = RNNClassifier(
        input_size=X_train_final.shape[2],
        hidden_size=params["hidden_size"],
        num_layers=params["num_layers"],
        dropout_rate=params["dropout_rate"]
    )

    criterion = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 3.0]))
    optimizer = optim.Adam(model.parameters(), lr=params["lr"])

    epochs = 50
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for Xb, yb in train_loader:
            optimizer.zero_grad()
            output = model(Xb)
            loss = criterion(output, yb)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        avg_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_loss)
        print(f"Epoch {epoch+1}, Train Loss: {avg_loss:.4f}")

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for Xb, yb in val_loader:
                out = model(Xb)
                val_loss += criterion(out, yb).item()

        val_loss_avg = val_loss / len(val_loader)
        val_losses.append(val_loss_avg)
        print(f"          Validation Loss: {val_loss_avg:.4f}")

                # Early stopping logic
        if val_loss_avg < best_val_loss - min_delta:
            best_val_loss = val_loss_avg
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

    if val_losses[-1] < best_loss:
        best_loss = val_losses[-1]
        best_model = model
        best_params = params
        best_train_losses = train_losses.copy()
        best_val_losses = val_losses.copy()

print("\nBest Params:", best_params)

plt.figure(figsize=(6, 4))
plt.plot(range(1, len(best_train_losses)+1), best_train_losses, marker='o', label='Train Loss')
plt.plot(range(1, len(best_val_losses)+1), best_val_losses, marker='s', label='Validation Loss')
plt.title("Training vs Validation Loss (Best Model)")
plt.xlabel("Epoch")
plt.ylabel("Cross-Entropy Loss")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

y_true, y_pred = [], []
with torch.no_grad():
    for Xb, yb in DataLoader(ICUReadmissionDataset(X_test_final, y_test), batch_size=32):
        preds = best_model(Xb).argmax(dim=1)
        y_true.extend(yb.numpy())
        y_pred.extend(preds.numpy())

print("\nClassification Report:")
print(classification_report(y_true, y_pred, digits=3))
print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))

from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, f1_score
import matplotlib.pyplot as plt

# Get test predictions
y_true, y_pred = [], []
with torch.no_grad():
    for Xb, yb in DataLoader(ICUReadmissionDataset(X_test_final, y_test), batch_size=32):
        preds = best_model(Xb).argmax(dim=1)
        y_true.extend(yb.numpy())
        y_pred.extend(preds.numpy())

# Calculate metrics
acc = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

# Plot confusion matrix
disp = ConfusionMatrixDisplay.from_predictions(
    y_true, y_pred,
    display_labels=["Not Readmitted", "Readmitted"],
    cmap="Blues",
    values_format="d"
)
plt.title(f"Confusion Matrix\nAccuracy: {acc:.3f}, F1 Score: {f1:.3f}")
plt.tight_layout()
plt.show()

# Print metrics
print(f"\Accuracy: {acc:.3f}")
print(f"F1 Score: {f1:.3f}")