In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

class SepsisDataset(Dataset):
    def __init__(self, dataframe, sequence_length=24, train=True, scaler=None):
        self.dataframe = dataframe
        self.sequence_length = sequence_length
        self.train = train
        
        # Separate features and labels
        self.features = dataframe.drop(['Patient_Id', 'SepsisLabel'], axis=1)
        self.labels = dataframe['SepsisLabel']
        
        # Scale features
        if train:
            self.scaler = StandardScaler()
            self.scaled_features = self.scaler.fit_transform(self.features)
        else:
            self.scaler = scaler
            self.scaled_features = self.scaler.transform(self.features)
        
        # Group by patient and create sequences
        self.sequences = []
        self.targets = []
        
        for patient_id in dataframe['Patient_Id'].unique():
            patient_data = dataframe[dataframe['Patient_Id'] == patient_id]
            patient_features = self.scaled_features[dataframe['Patient_Id'] == patient_id]
            patient_labels = patient_data['SepsisLabel'].values
            
            # Create sequences
            for i in range(len(patient_data) - sequence_length + 1):
                sequence = patient_features[i:i+sequence_length]
                target = patient_labels[i+sequence_length-1]  # Predict last label in sequence
                self.sequences.append(sequence)
                self.targets.append(target)
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        return torch.FloatTensor(self.sequences[idx]), torch.LongTensor([self.targets[idx]])

In [3]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.hidden_size = hidden_size
        self.attention = nn.Linear(hidden_size, 1)
        
    def forward(self, lstm_output):
        # lstm_output shape: (batch_size, seq_len, hidden_size)
        attention_weights = torch.softmax(self.attention(lstm_output).squeeze(2), dim=1)
        # attention_weights shape: (batch_size, seq_len)
        
        context_vector = torch.bmm(attention_weights.unsqueeze(1), lstm_output).squeeze(1)
        # context_vector shape: (batch_size, hidden_size)
        
        return context_vector, attention_weights

In [4]:
class LSTMAttentionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout=0.3):
        super(LSTMAttentionModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                           batch_first=True, dropout=dropout, bidirectional=True)
        self.attention = Attention(hidden_size * 2)  # *2 for bidirectional
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size * 2, num_classes)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        # x shape: (batch_size, sequence_length, input_size)
        batch_size = x.size(0)
        
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(x.device)  # *2 for bidirectional
        c0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(x.device)
        
        # LSTM forward
        lstm_out, _ = self.lstm(x, (h0, c0))
        # lstm_out shape: (batch_size, seq_len, hidden_size * 2)
        
        # Attention
        context_vector, attention_weights = self.attention(lstm_out)
        
        # Dropout and fully connected layer
        out = self.dropout(context_vector)
        out = self.fc(out)
        
        return out, attention_weights

In [5]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=50, device='cpu'):
    model.to(device)
    train_losses = []
    val_losses = []
    val_accuracies = []
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0.0
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device).squeeze()
            
            optimizer.zero_grad()
            output, _ = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device).squeeze()
                output, _ = model(data)
                loss = criterion(output, target)
                val_loss += loss.item()
                
                pred = output.argmax(dim=1)
                correct += (pred == target).sum().item()
                total += target.size(0)
        
        # Calculate metrics
        train_loss_avg = train_loss / len(train_loader)
        val_loss_avg = val_loss / len(val_loader)
        val_accuracy = correct / total
        
        train_losses.append(train_loss_avg)
        val_losses.append(val_loss_avg)
        val_accuracies.append(val_accuracy)
        
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss_avg:.4f}, '
              f'Val Loss: {val_loss_avg:.4f}, Val Acc: {val_accuracy:.4f}')
    
    return train_losses, val_losses, val_accuracies

In [6]:
def main():
    # Load your data
    df = pd.read_csv("/Users/farhat/Documents/Project/ProcessedData/fullData.csv")
    
    # Split data (you might want to do patient-wise split)
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['SepsisLabel'])
    
    # Create datasets
    train_dataset = SepsisDataset(train_df, sequence_length=24, train=True)
    test_dataset = SepsisDataset(test_df, sequence_length=24, train=False, scaler=train_dataset.scaler)
    
    # Create data loaders
    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Model parameters
    input_size = train_dataset.scaled_features.shape[1]  # Number of features
    hidden_size = 64
    num_layers = 2
    num_classes = 2  # Binary classification
    
    # Initialize model
    model = LSTMAttentionModel(input_size, hidden_size, num_layers, num_classes)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    
    # Train model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    train_losses, val_losses, val_accuracies = train_model(
        model, train_loader, test_loader, criterion, optimizer, 
        num_epochs=50, device=device
    )
    
    # Evaluate model
    model.eval()
    all_preds = []
    all_targets = []
    all_attention_weights = []
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device).squeeze()
            output, attention_weights = model(data)
            pred = output.argmax(dim=1)
            
            all_preds.extend(pred.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
            all_attention_weights.extend(attention_weights.cpu().numpy())
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(all_targets, all_preds))
    
    print("\nConfusion Matrix:")
    print(confusion_matrix(all_targets, all_preds))
    
    # Plot training history
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    return model, all_attention_weights

if __name__ == "__main__":
    model, attention_weights = main()

Using device: cpu
Epoch 1/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 2/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 3/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 4/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 5/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 6/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 7/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 8/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 9/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 10/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 11/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 12/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 13/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 14/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 15/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 16/50, Train Loss: nan, Val Loss: nan, Val Acc: 0.9614
Epoch 17/50, Tr

KeyboardInterrupt: 

In [8]:
def visualize_attention(attention_weights, sequence_data, feature_names):
    """
    Visualize attention weights for a specific sample
    """
    sample_idx = 0  # Choose a sample to visualize
    attn_weights = attention_weights[sample_idx]
    sequence = sequence_data[sample_idx]
    
    plt.figure(figsize=(15, 8))
    
    # Plot attention weights
    plt.subplot(2, 1, 1)
    plt.bar(range(len(attn_weights)), attn_weights)
    plt.title('Attention Weights Over Time Steps')
    plt.xlabel('Time Step')
    plt.ylabel('Attention Weight')
    
    # Plot feature importance (average attention per feature)
    plt.subplot(2, 1, 2)
    feature_importance = np.mean(sequence * attn_weights.reshape(-1, 1), axis=0)
    plt.bar(range(len(feature_importance)), feature_importance)
    plt.title('Feature Importance (Weighted by Attention)')
    plt.xlabel('Feature Index')
    plt.ylabel('Importance')
    plt.xticks(range(len(feature_names)), feature_names, rotation=90)
    
    plt.tight_layout()
    plt.show()

# Usage example:
# feature_names = train_dataset.features.columns.tolist()
# visualize_attention(attention_weights, test_dataset.sequences, feature_names)