Belfiore Asia, CID:02129867

MSc Advanced Computing, Spring Term, January 2025

# **Acute Kidney Injury (AKI) Neural Network Predictor**

In [6]:
import numpy as np  # add to requirements.txt
import torch    # add to requirements.txt
import torch.nn as nn
from utils import *

# 0. Define Local Data Paths
train_data_path = 'data/training.csv'
test_data_path = 'data/test.csv'

# GPU availability check (MAC)
# if torch.backends.mps.is_available():
#     mps_device = torch.device("mps")
#     x = torch.ones(1, device=mps_device)
#     print (x)
# else:
#     print ("MPS device not found.")

In [None]:
# Define Model Architecture
class AkiPred(nn.Module):
    """
        AkiPred Model
        architecture:
            input layer  
            -> hidden layer 1 
            -> dropout layer 
            -> hidden layer 2 
            -> output layer
    """

    def __init__(self, input_size, hidden_size):
        """
            Define Model Architecture
            inputs: 
                input_size: number of features in the input data
                hidden_size: number of neurons in the hidden layers

        """
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # input layer ()
        self.input_layer = nn.Linear(input_size, hidden_size)

        # (2) hidden layers ()
        self.hidden1 = nn.Linear(hidden_size, hidden_size)
        # dropout layer
        self.dropout = nn.Dropout(p=0.1)
        self.hidden2 = nn.Linear(hidden_size, hidden_size)

        # output layer ()
        self.output_layer = nn.Linear(hidden_size, 1) 

        # activation functions #
        # self.prelu = nn.PReLU(1)  # for hidden layers 
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()  # for output layer 

    # model forward pass
    def forward(self, x):
        """
            AkiPred model: 
            inputs: 
                x: number of features in the input data
        """
        # ReLU activation for input and hidden layers #
        x = self.relu(self.input_layer(x)) 
        x = self.relu(self.hidden1(x)) 
        x = self.dropout(x)   # dropout layer
        x = self.relu(self.hidden2(x)) 
        x = self.output_layer(x)
        x = self.sigmoid(x) # squash output between 0 and 1
        return x
    
    # def xavier_init(self, m):
    #     if type(m) == nn.Linear:
    #         torch.nn.init.xavier_uniform_(m.weight)


# Save Model after training is complete
def save_model(model):
    """
        Save AkiPred Model
        input: 
            model: AkiPred model
        output: -
    """
    model_path = 'aki_predictor/aki_predictor.pth'
    try:
        torch.save(model, model_path)
        print("Saved PyTorch Model State to", model_path)
    except:
        print("An error occurred.")


# Model Training Pipeline 
def train_model(model, train_data):
    """
        Pipeline to Train AkiPred Model
        input: 
            model: AkiPred model
            train_data: training data 
                        (age, sex, blood tests cretinine results, time elapsed between tests)
        output: -
    """
    # set epochs and batch size
    epochs_num = 100
    batch_size = 250
    model_train_loss = [] # save model loss for each epoch

    # set model parameters
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam Optimizer
    model_loss = nn.MSELoss()  # Mean Squared Error Loss
    # model_loss = nn.BCELoss()  # Binary Cross Entropy Loss

    dataloader_train = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    # train model 
    for epoch in range(epochs_num): 
        model.train()
        for i, (sample, x, y) in enumerate(dataloader_train):
            # inputs
            inputs = x
            # target outputs
            target = y.unsqueeze(1) # to match model output shape [batch_size, 1]

            # model outputs
            outputs = model(inputs) 
            loss = model_loss(outputs, target)  # compute loss

            # step + optimizer
            optimizer.zero_grad()  
            loss.backward()
            optimizer.step()

        model_train_loss.append(loss.item())
        
        if (epoch+1)%50 == 0:
            # Get the loss for every 50th epoch
            print(f'Epoch {epoch+1}/{epochs_num}: --> Loss = {loss.item()}')

    # save_model(model)


In [None]:
# Model Training Pipeline 
def train_model(model, train_data):
    # set epochs and batch size
    epochs_num = 100
    batch_size = 250
    model_train_loss = [] # save model loss for each epoch

    # set model parameters
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam Optimizer
    model_loss = nn.MSELoss()  # Mean Squared Error Loss
    # model_loss = nn.BCELoss()  # Binary Cross Entropy Loss

    dataloader_train = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    # train model 
    for epoch in range(epochs_num): 
        model.train()
        for i, (sample, x, y) in enumerate(dataloader_train):
            # inputs
            inputs = x
            # target outputs
            target = y.unsqueeze(1) # to match model output shape [batch_size, 1]

            # model outputs
            outputs = model(inputs) 
            loss = model_loss(outputs, target)  # compute loss

            # step + optimizer
            optimizer.zero_grad()  
            loss.backward()
            optimizer.step()

        model_train_loss.append(loss.item())
        
        if (epoch+1)%50 == 0:
            # Get the loss for every 50th epoch
            print(f'Epoch {epoch+1}/{epochs_num}: --> Loss = {loss.item()}')

    # save_model(model)


In [None]:
# 1. Load Training and Testing Datasets
train_data = preprocess_data('data/training.csv')
test_data = format_data('data/test.csv')


Unnamed: 0,age,sex,aki,creatinine_date_0,creatinine_result_0,creatinine_date_1,creatinine_result_1,creatinine_date_2,creatinine_result_2,creatinine_date_3,...,creatinine_date_4,creatinine_result_4,creatinine_date_5,creatinine_result_5,creatinine_date_6,creatinine_result_6,creatinine_date_7,creatinine_result_7,creatinine_date_8,creatinine_result_8
0,35,m,0,2023-11-10 19:14:00,105.61,2023-11-12 09:14:00,106.07,2023-11-12 11:50:00,98.76,,...,,,,,,,,,,
1,25,m,0,2023-06-12 19:54:00,174.11,2023-06-13 14:27:00,210.22,2023-06-16 15:55:00,210.52,,...,,,,,,,,,,
2,30,m,0,2023-04-17 19:41:00,108.12,2023-04-19 09:58:00,104.41,,,,...,,,,,,,,,,
3,21,m,0,2023-02-19 18:05:00,206.12,2023-05-10 12:19:00,214.51,2023-05-10 14:29:00,224.88,2023-05-11 09:20:00,...,,,,,,,,,,
4,66,m,0,2023-04-08 14:40:00,111.43,2023-04-10 13:42:00,87.20,2023-06-17 11:32:00,80.58,2023-06-17 15:50:00,...,2023-06-19 16:03:00,112.88,2023-06-21 09:45:00,117.44,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,50,m,0,2023-03-01 13:15:00,78.99,2023-03-03 09:13:00,72.17,2023-03-05 14:45:00,92.08,2023-10-06 12:21:00,...,2023-10-06 12:29:00,73.26,,,,,,,,
999996,64,m,0,2023-07-02 14:02:00,118.14,2023-07-04 16:04:00,112.45,2023-07-04 16:20:00,124.21,2023-07-06 13:52:00,...,2023-07-08 16:31:00,121.36,,,,,,,,
999997,47,f,0,2023-04-22 17:39:00,87.90,2023-04-24 18:24:00,92.81,2024-01-03 16:33:00,81.39,2024-01-03 17:17:00,...,,,,,,,,,,
999998,62,f,0,2023-07-07 11:43:00,106.79,2023-07-08 17:26:00,104.93,2023-11-14 09:29:00,98.66,2023-11-14 12:46:00,...,2023-11-14 17:20:00,118.14,,,,,,,,


In [None]:
# 2. Initialise Model
input_size = 10
hidden_size = 64
model = AkiPred(input_size, hidden_size)
# model.apply(model.xavier_init)
print(model, "\nTotal samples:", sample_num)

In [270]:
from importlib import reload
import numpy as np 
import utils
reload(utils)

from utils import *

In [2]:
# 1. Load the training data
train_data_path = 'data/training.csv'
header_len = get_longest_row(train_data_path)
test_dates_columns = get_header(header_len, 'train')

creatinine_columns = test_dates_columns.copy()[3:]

train_data = pd.read_csv(train_data_path, sep=',', names=test_dates_columns, skiprows=1)

# get the exam date columns to datetime
for i in range(0, len(creatinine_columns), 2):
    train_data[creatinine_columns[i]] = pd.to_datetime(train_data[creatinine_columns[i]])

train_data

Unnamed: 0,age,sex,aki,creatinine_date_0,creatinine_result_0,creatinine_date_1,creatinine_result_1,creatinine_date_2,creatinine_result_2,creatinine_date_3,...,creatinine_date_4,creatinine_result_4,creatinine_date_5,creatinine_result_5,creatinine_date_6,creatinine_result_6,creatinine_date_7,creatinine_result_7,creatinine_date_8,creatinine_result_8
0,35,m,n,2023-11-10 19:14:00,105.61,2023-11-12 09:14:00,106.07,2023-11-12 11:50:00,98.76,NaT,...,NaT,,NaT,,NaT,,NaT,,NaT,
1,25,m,n,2023-06-12 19:54:00,174.11,2023-06-13 14:27:00,210.22,2023-06-16 15:55:00,210.52,NaT,...,NaT,,NaT,,NaT,,NaT,,NaT,
2,30,m,n,2023-04-17 19:41:00,108.12,2023-04-19 09:58:00,104.41,NaT,,NaT,...,NaT,,NaT,,NaT,,NaT,,NaT,
3,21,m,n,2023-02-19 18:05:00,206.12,2023-05-10 12:19:00,214.51,2023-05-10 14:29:00,224.88,2023-05-11 09:20:00,...,NaT,,NaT,,NaT,,NaT,,NaT,
4,66,m,n,2023-04-08 14:40:00,111.43,2023-04-10 13:42:00,87.20,2023-06-17 11:32:00,80.58,2023-06-17 15:50:00,...,2023-06-19 16:03:00,112.88,2023-06-21 09:45:00,117.44,NaT,,NaT,,NaT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,50,m,n,2023-03-01 13:15:00,78.99,2023-03-03 09:13:00,72.17,2023-03-05 14:45:00,92.08,2023-10-06 12:21:00,...,2023-10-06 12:29:00,73.26,NaT,,NaT,,NaT,,NaT,
999996,64,m,n,2023-07-02 14:02:00,118.14,2023-07-04 16:04:00,112.45,2023-07-04 16:20:00,124.21,2023-07-06 13:52:00,...,2023-07-08 16:31:00,121.36,NaT,,NaT,,NaT,,NaT,
999997,47,f,n,2023-04-22 17:39:00,87.90,2023-04-24 18:24:00,92.81,2024-01-03 16:33:00,81.39,2024-01-03 17:17:00,...,NaT,,NaT,,NaT,,NaT,,NaT,
999998,62,f,n,2023-07-07 11:43:00,106.79,2023-07-08 17:26:00,104.93,2023-11-14 09:29:00,98.66,2023-11-14 12:46:00,...,2023-11-14 17:20:00,118.14,NaT,,NaT,,NaT,,NaT,


In [315]:
from importlib import reload
import numpy as np 
import utils
reload(utils)

from utils import *

# 5. Compare to NHS algorithm
patient_index = np.random.randint(0, len(train_data))
patient_data = train_data.iloc[patient_index]
(patient, aki) = process_patient_data(patient_data, 'train')

nhs_aki = nhs_aki_algo(patient)
print(f"Patient {patient_index} NHS Aki: {nhs_aki} | Patient Aki: {aki}")

(patient[5:], aki)


Patient 390314 NHS Aki: 1 | Patient Aki: 1


([2.026237559777692, 79.4], 1)