# Transformer Approach

In [None]:
import torch
import torch.nn as nn

class ICUTransformer(nn.Module):
    def __init__(self, input_dim, static_dim, d_model=64, nhead=4, num_layers=2, ff_dim=128, dropout=0.1):
        super(ICUTransformer, self).__init__()

        # Project input features to d_model dimensions
        self.input_proj = nn.Linear(input_dim, d_model)

        # Positional encoding (learned, not fixed sinusoidal)
        self.pos_encoder = nn.Parameter(torch.randn(1, 48, d_model))  # assuming 48 time steps

        # Transformer encoder layers
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=ff_dim,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Combine Transformer output with static features
        self.combined_proj = nn.Linear(d_model + static_dim, 64)

        # Classification head
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x_seq, x_static):
        """
        x_seq: Tensor of shape (batch_size, time_steps=48, input_dim)
        x_static: Tensor of shape (batch_size, static_dim)
        """
        x = self.input_proj(x_seq)             # → (batch, time, d_model)
        x = x + self.pos_encoder               # add positional encoding
        x = self.transformer_encoder(x)        # → (batch, time, d_model)
        x = x.mean(dim=1)                      # global average pooling over time

        x_combined = torch.cat([x, x_static], dim=1)  # concatenate with static features
        x_out = self.combined_proj(x_combined)
        return self.classifier(x_out)          # → output probability





import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.metrics import accuracy_score, roc_auc_score

# X_time_series: [num_samples, 48, 10]
# X_static: [num_samples, 8]
# y: [num_samples]

X_seq_tensor = torch.tensor(normalized_tensor, dtype=torch.float32)
X_static_tensor = torch.tensor(static_encoded_df.values, dtype=torch.float32)
y_tensor = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)  # [N, 1]

dataset = TensorDataset(X_seq_tensor, X_static_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32)

model = ICUTransformer(input_dim=10, static_dim=8)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for x_seq, x_static, y in train_loader:
        optimizer.zero_grad()
        outputs = model(x_seq, x_static)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)

    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for x_seq, x_static, y in val_loader:
            outputs = model(x_seq, x_static)
            y_true.extend(y.cpu().numpy())
            y_pred.extend(outputs.cpu().numpy())

    y_pred_label = [1 if p > 0.5 else 0 for p in y_pred]
    val_acc = accuracy_score(y_true, y_pred_label)
    val_auc = roc_auc_score(y_true, y_pred)

    print(f"Epoch {epoch+1}/{num_epochs} — Loss: {avg_loss:.4f}, Val Acc: {val_acc:.4f}, ROC-AUC: {val_auc:.4f}")