In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
# Step 1: Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_layer_size, num_classes):
        super(LSTMModel, self).__init__()
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_layer_size, num_classes)
        
    def forward(self, x):
        # Get LSTM outputs
        lstm_out, (hn, cn) = self.lstm(x)
        
        # Use the last time step's output for classification
        out = self.fc(hn[-1])
        
        return out

In [5]:
# Step 2: Data preprocessing and reshaping (make sure your data is preprocessed)
def preprocess_data(df):
    
    # Drop non-numeric or ID columns
    X = data.drop(columns=['SepsisLabel', 'patient_id'])

    # Target variable
    y = data['SepsisLabel']

    # Normalize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    # Convert data into sequences (for LSTM input)
    sequence_length = 10  # You can adjust this based on your needs
    X_seq = []
    y_seq = []
    
    for i in range(len(X) - sequence_length):
        X_seq.append(X[i:i+sequence_length])
        y_seq.append(y[i+sequence_length])

    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)
    
    return X_seq, y_seq

In [19]:
# Step 3: Train and evaluate the LSTM model
def train_evaluate_lstm(X_train, y_train, X_test, y_test, input_size, hidden_layer_size=64, num_epochs=10, batch_size=64,model_name='LSTM_Model'):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Define the model, loss function, and optimizer
    model = LSTMModel(input_size=input_size, hidden_layer_size=hidden_layer_size, num_classes=2).to(device)
    criterion = nn.CrossEntropyLoss()  # For binary classification
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Convert to torch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i in range(0, len(X_train), batch_size):
            # Get the batch data
            X_batch = X_train_tensor[i:i+batch_size]
            y_batch = y_train_tensor[i:i+batch_size]
            
            # Forward pass
            optimizer.zero_grad()
            output = model(X_batch)
            
            # Compute the loss
            loss = criterion(output, y_batch)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(X_train):.4f}')
    
    # Evaluate the model on the test set
    model.eval()
    with torch.no_grad():
        output = model(X_test_tensor)
        _, predicted = torch.max(output, 1)
        y_pred = predicted.cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()
        
        # Calculate metrics
        acc = accuracy_score(y_true, y_pred)
        auc = roc_auc_score(y_true, y_pred)
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

        tpr = tp / (tp + fn) if (tp + fn) else 0
        fpr = fp / (fp + tn) if (fp + tn) else 0
        
        # Print evaluation metrics
        print(f'Accuracy: {acc:.4f}')
        print(f'AUC: {auc:.4f}')
        print(f'Confusion Matrix: \n{tn} {fp}\n{fn} {tp}')

        #Store the results in a dictionary
        results = {
        "model": model_name,
        "Accuracy": acc,
        "TPR (Recall)": tpr,
        "FPR": fpr,
        "AUC": auc
    }
    return results

In [11]:
#Preprocess the data
data=pd.read_csv('sepsis_data_cleaned.csv')

In [13]:
X_seq, y_seq = preprocess_data(data)

In [15]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

In [21]:
input_size = X_train.shape[2]  # Number of features per time step
lstm_imbalanced_results = train_evaluate_lstm(X_train, y_train, X_test, y_test, input_size, model_name='LSTM_Imbalanced')

Epoch 1/10, Loss: 0.0013
Epoch 2/10, Loss: 0.0012
Epoch 3/10, Loss: 0.0012
Epoch 4/10, Loss: 0.0012
Epoch 5/10, Loss: 0.0011
Epoch 6/10, Loss: 0.0011
Epoch 7/10, Loss: 0.0011
Epoch 8/10, Loss: 0.0010
Epoch 9/10, Loss: 0.0010
Epoch 10/10, Loss: 0.0010
Accuracy: 0.9835
AUC: 0.5467
Confusion Matrix: 
91374 126
1414 148


In [23]:
#LSTM with weights

In [25]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import numpy as np

# Function to calculate class weights
def calculate_class_weights(y_train):
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weight_dict = dict(zip(np.unique(y_train), class_weights))
    return class_weight_dict

# Now modify the LSTM model training to incorporate these class weights
def train_evaluate_lstm_with_weights(X_train, y_train, X_test, y_test, input_size, class_weight_dict, model_name="LSTM with Class Weights"):
    # Define the LSTM model as before
    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
    model.add(LSTM(32, activation='relu', return_sequences=False))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model with class weights
    history = model.fit(X_train, y_train, epochs=10, batch_size=64, class_weight=class_weight_dict, verbose=1)

    # Evaluate the model
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    
    # Calculate confusion matrix for TPR, FPR
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    tpr = tp / (tp + fn) if (tp + fn) else 0
    fpr = fp / (fp + tn) if (fp + tn) else 0
    
    # Print evaluation metrics
    print(f"Accuracy: {accuracy:.4f}")
    print(f"AUC: {auc:.4f}")
    print(f"Confusion Matrix: \n{tn} {fp}\n{fn} {tp}")
    
    # Return results as a dictionary
    results = {
        "model": model_name,
        "Accuracy": accuracy,
        "TPR (Recall)": tpr,
        "FPR": fpr,
        "AUC": auc
    }
    
    return results

In [27]:
class_weight_dict = calculate_class_weights(y_train)

# Train and evaluate with class weights
lstm_with_weights_results = train_evaluate_lstm_with_weights(X_train, y_train, X_test, y_test, input_size, 
                                                             class_weight_dict, model_name="LSTM with Class Weights")

  super().__init__(**kwargs)


Epoch 1/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 4ms/step - accuracy: 0.7624 - loss: 0.6062
Epoch 2/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4ms/step - accuracy: 0.7690 - loss: 0.5521
Epoch 3/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 5ms/step - accuracy: 0.7546 - loss: 0.5501
Epoch 4/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 4ms/step - accuracy: 0.7628 - loss: 0.5170
Epoch 5/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 5ms/step - accuracy: 0.7699 - loss: 0.4909
Epoch 6/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 5ms/step - accuracy: 0.7809 - loss: 0.4616
Epoch 7/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4ms/step - accuracy: 0.7883 - loss: 0.4440
Epoch 8/10
[1m5817/5817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 5ms/step - accuracy: 0.8014 - loss: 0.4150
Epoch 9/10
[1m5

In [29]:
# Store the results in a list or DataFrame
results = []
results.append(lstm_imbalanced_results)
results.append(lstm_with_weights_results)

In [31]:
# Convert to DataFrame for better visualization or saving
import pandas as pd
results_df = pd.DataFrame(results)

In [33]:
results_df

Unnamed: 0,model,Accuracy,TPR (Recall),FPR,AUC
0,LSTM_Imbalanced,0.983452,0.09475,0.001377,0.546687
1,LSTM with Class Weights,0.760149,0.804738,0.240612,0.782063


In [35]:
results_df.to_csv("lstm_results.csv", index=False)