In [None]:
import random
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve, precision_recall_curve
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

# Initialize model, loss, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_epochs = 15

In [None]:
def set_seed(seed):
    torch.manual_seed(seed)  # Sets the seed for CPU operations
    torch.cuda.manual_seed(seed)  # Sets the seed for CUDA GPU operations
    torch.cuda.manual_seed_all(seed)  # If using multiple GPUs
    random.seed(seed)  # Python's random library
    np.random.seed(seed)  # NumPy
    
    # For determinism in certain CUDA operations
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
X_train = torch.load("../../data/set-a-chronos-embeddings.pt")
X_test = torch.load("../../data/set-c-chronos-embeddings.pt")


outcomes_a = pd.read_csv('../../data/Outcomes-a.txt').sort_values(by=['RecordID']).set_index("RecordID")
outcomes_c = pd.read_csv('../../data/Outcomes-c.txt').sort_values(by=['RecordID']).set_index("RecordID")

outcomes_a = outcomes_a["In-hospital_death"]
outcomes_c = outcomes_c["In-hospital_death"]


train_dataset = TensorDataset(X_train, torch.tensor(outcomes_a.values))
test_dataset = TensorDataset(X_test, torch.tensor(outcomes_c.values))

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [None]:
class ChannelAggregator(nn.Module):
   def __init__(self, input_dim, num_channels, dropout_rate=0.5):
      super(ChannelAggregator, self).__init__()
      
      # Flatten layer
      self.flatten = nn.Flatten()  # This will flatten all dimensions except the batch dimension
        
      # Linear layer to compute logits for all channels
      self.attention_fc = nn.Linear(input_dim * num_channels, num_channels)
      self.attention_dropout = nn.Dropout(dropout_rate)  # Dropout for attention_fc
      
      # Linear layer to compute final prediction
      self.classification_head = nn.Linear(input_dim, 1)
      self.classification_dropout = nn.Dropout(dropout_rate)  # Dropout for classification_head
      
      self.sigmoid = nn.Sigmoid()
      
   def forward(self, x):
      # Flatten the input along the channel and feature dimensions
      flattened_x = self.flatten(x)  # Shape: (batch_size, num_channels * input_dim)
      
      
      # Compute logits for all channels
      attention_logits = self.attention_fc(flattened_x)  # Shape: (batch_size, num_channels)
      attention_logits = self.attention_dropout(attention_logits)  # Apply dropout

      # Compute softmax to normalize weights across channels
      attention_weights = F.softmax(attention_logits, dim=1)  # Shape: (batch_size, num_channels)
      attention_weights = attention_weights.unsqueeze(-1)
      
      weighted_embeddings = attention_weights * x  # Element-wise multiplication (broadcasting)
      aggregated_embeddings = weighted_embeddings.sum(dim=1)  # Shape: (batch_size, input_dim)
      aggregated_embeddings = self.classification_dropout(aggregated_embeddings)  # Apply dropout
      
      out = self.classification_head(aggregated_embeddings)
      out = self.sigmoid(out)
      
      return out
      

In [None]:
set_seed(42)
embedding_dim = X_train.shape[-1]
dropout = 0.2
num_channels = 41
num_epochs = 15

model = ChannelAggregator(embedding_dim, num_channels, dropout).to(device)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs.squeeze(), batch_y.float())
        loss.backward()
        optimizer.step()

In [None]:
# Evaluation loop
model.eval()  # Set model to evaluation mode
total_loss = 0
all_labels = []
all_probs = []

with torch.no_grad():  # Disable gradient computation for efficiency
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        loss = criterion(outputs.squeeze(), batch_y.float())
        total_loss += loss.item()

        # Get probabilities (if using softmax for multi-class or sigmoid for binary)
        probs = outputs


        all_probs.extend(probs.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

# Compute metrics
average_loss = total_loss / len(test_loader)
auroc = roc_auc_score(all_labels, all_probs)
auprc = average_precision_score(all_labels, all_probs)

print(f"Test AuROC: {auroc:.4f}")
print(f"Test AuPRC: {auprc:.4f}")