In [1]:
import tsdb
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from data_processor import DataProcessor

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

data = tsdb.load('physionet_2012')


2024-12-05 14:24:55 [INFO]: You're using dataset physionet_2012, please cite it properly in your work. You can find its reference information at the below link: 
https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/physionet_2012
2024-12-05 14:24:55 [INFO]: Dataset physionet_2012 has already been downloaded. Processing directly...
2024-12-05 14:24:55 [INFO]: Dataset physionet_2012 has already been cached. Loading from cache directly...
2024-12-05 14:24:55 [INFO]: Loaded successfully!


## Data voorbereiden

In [2]:
print(data.keys())
df_a  = pd.DataFrame(data['set-a'])
df_outcomes_a  = pd.DataFrame(data['outcomes-a']).reset_index()
df_static_features  = pd.DataFrame(data['static_features'])

id = "RecordID"
time = "Time"
global_features = list(df_static_features.iloc[:,0])

print(df_a.shape, len(df_a[id].unique()))
df_a.head(3)
# df_outcomes_a.head(50)

dict_keys(['set-a', 'set-b', 'set-c', 'outcomes-a', 'outcomes-b', 'outcomes-c', 'static_features'])
(180552, 43) 3997


Parameter,ALP,ALT,AST,Age,Albumin,BUN,Bilirubin,Cholesterol,Creatinine,DiasABP,FiO2,GCS,Gender,Glucose,HCO3,HCT,HR,Height,ICUType,K,Lactate,MAP,MechVent,Mg,NIDiasABP,NIMAP,NISysABP,Na,PaCO2,PaO2,Platelets,RecordID,RespRate,SaO2,SysABP,Temp,Time,TroponinI,TroponinT,Urine,WBC,Weight,pH
0,,,,54.0,,,,,,,,15.0,0.0,,,,75.0,-1.0,4.0,,,,,,61.5,91.665,152.0,,,,,132539,19.0,,,35.35,0,,,480.0,,-1.0,
1,,,,54.0,,,,,,,,,,,,,60.0,-1.0,,,,,,,62.0,87.0,137.0,,,,,132539,18.0,,,,1,,,30.0,,,
2,,,,54.0,,,,,,,,,,,,,62.0,-1.0,,,,,,,52.0,75.67,123.0,,,,,132539,19.0,,,,2,,,170.0,,,


In [3]:
processor = DataProcessor(
    df=df_a, 
    id_col=id, 
    time_col=time,
    global_features=global_features,
    max_wanted_len=5000
)

# Apply processing steps
processor.replace_global_features_with_nan()
processor.zscore_transformation(cols_exclude=[id, time])
processor.melt_dataframe()
processor.merge_label(df_label=df_outcomes_a, target="In-hospital_death")
processor.tokenizer()
processor.df_to_3dtensor()


melted_df = processor.return_()
melted_3dtensor = processor.return_("3dtensor")
label_tensor = processor.return_("label_tensor")

# Display the first 50 rows of the processed melted DataFrame
print(melted_3dtensor.shape)
print(label_tensor.shape)
print(melted_df.head())

max_wanted_len is larger than the number of timestamps in the data: 5000. Data has a max of: 631
torch.Size([3997, 631, 3])
torch.Size([3997])
   RecordID  Time  event     value  In-hospital_death
0    132539     0      0 -0.584118                  0
1    132539     0      1  0.905648                  0
2    132539     0      2 -1.126121                  0
3    132539     0      3 -0.664478                  0
4    132539     0      4 -1.039968                  0


In [4]:
index = 3147
print(melted_3dtensor[index][0][0].item())
print(df_outcomes_a.iloc[index].values)
print(label_tensor[index].item())
melted_df.query("RecordID==132539")

0.0
[140501      0]
0.0


Unnamed: 0,RecordID,Time,event,value,In-hospital_death
0,132539,0,0,-0.584118,0
1,132539,0,1,0.905648,0
2,132539,0,2,-1.126121,0
3,132539,0,3,-0.664478,0
4,132539,0,4,-1.039968,0
...,...,...,...,...,...
259,132539,47,7,0.149379,0
260,132539,47,8,0.403089,0
261,132539,47,9,0.615305,0
262,132539,47,10,0.510532,0


## Algoritme
We hebben nu melted_3dtensor en label_tensor als input en labels

In [5]:
melted_3dtensor = melted_3dtensor.float()
label_tensor = label_tensor.long()
print(melted_3dtensor.size(), label_tensor.size())

torch.Size([3997, 631, 3]) torch.Size([3997])


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

class Simple2LayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Simple2LayerModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)  # Use for multi-class classification.

    def forward(self, x):
        x = torch.mean(x, dim=1)  # Aggregate over time steps
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return self.softmax(x)


In [10]:
from tqdm.auto import tqdm  # Use tqdm.auto for Jupyter compatibility

# Hyperparameters
input_dim = melted_3dtensor.shape[2]  # Number of features in the tensor
hidden_dim = 64                       # Hidden layer size
output_dim = len(torch.unique(label_tensor))  # Number of output classes
learning_rate = 0.001
num_epochs = 20
batch_size = 32

# Create model, loss, and optimizer
model = Simple2LayerModel(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()  # Use for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Create a tqdm progress bar for the epochs
epoch_iter = tqdm(range(num_epochs), desc="Training", unit="epoch")

# Training loop
for epoch in epoch_iter:

    model.train()  # Set model to training mode

    # Shuffle data for each epoch (optional)
    permutation = torch.randperm(melted_3dtensor.size(0))
    melted_3dtensor = melted_3dtensor[permutation]
    label_tensor = label_tensor[permutation]

    epoch_loss = 0  # To accumulate the loss for this epoch

    for i in range(0, melted_3dtensor.size(0), batch_size):
        # Mini-batch selection
        batch_data = melted_3dtensor[i:i + batch_size]
        batch_labels = label_tensor[i:i + batch_size]

        # Forward pass
        outputs = model(batch_data)
        loss = criterion(outputs, batch_labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate loss
        epoch_loss += loss.item()

    # Calculate the average loss for the epoch
    avg_loss = epoch_loss / (len(melted_3dtensor) // batch_size)

    # Update the tqdm progress bar with the current epoch and loss
    epoch_iter.set_postfix(loss=f"{avg_loss:.4f}")

# Save the trained model (optional)
torch.save(model.state_dict(), "simple_2layer_model.pth")
print("Model training complete and saved!")


Training:   0%|          | 0/20 [00:00<?, ?epoch/s]

Model training complete and saved!
