# Question 2

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from project_1.config import PROJ_ROOT, PROCESSED_DATA_DIR
from project_1.loading import *
from project_1.dataset import *

[32m2025-03-24 17:03:19.746[0m | [1mINFO    [0m | [36mproject_1.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /Users/francescobondi/Desktop/stuff/ETH/FS25/ML for Healthcare/project-1-ml4hc[0m


## Data Loading
For basic LSTM, we load the final datasets

In [2]:
set_a, set_b, set_c = load_final_data_without_ICU()
death_a, death_b, death_c = load_outcomes()

Shapes of the datasets:
Set A: (183416, 42) Set B: (183495, 42) Set C: (183711, 42)
Shapes of labels:
Set A: (4000, 2) Set B: (4000, 2) Set C: (4000, 2)


# Attempt 1 - LSTM - Model Implementation (Last State)
This basic implementation takes the last hidden state to be used for prediction

In [3]:
class LSTM_Model(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, num_classes=1, dropout=0.3):
        super(LSTM_Model, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,       # 41 features per time step
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x: (batch_size, seq_len, input_size)
        out, _ = self.lstm(x)           # out: (batch_size, seq_len, hidden_size)
        out = out[:, -1, :]             # Take last time step: (batch_size, hidden_size)
        out = self.fc(out)              # (batch_size, num_classes)
        return out.squeeze()            # (batch_size,) for BCEWithLogitsLoss

# Obtain TensorDatasets from Time Series data

In [4]:
train_dataset = create_dataset_from_timeseries(set_a, death_a["In-hospital_death"])
validation_dataset = create_dataset_from_timeseries(set_b, death_b["In-hospital_death"])
test_dataset = create_dataset_from_timeseries(set_c, death_c["In-hospital_death"])

train_dataset.tensors[0].shape # (batch_size, seq_len, input_size)

torch.Size([4000, 49, 40])

In [5]:
# Convert to DataLoader
from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## Train Loop

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = train_dataset.tensors[0].shape[-1]
model = LSTM_Model(input_size=input_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Call the trainig loop (default 10 epochs)
model = train_model_with_validation(model, train_loader, validation_loader, criterion, optimizer, device)

  from .autonotebook import tqdm as notebook_tqdm
                                                                              

Epoch 1/10
  Train Loss: 0.4497 | AUCROC: 0.5604 | AUPRC: 0.1566
  Val   Loss: 0.3547 | AUCROC: 0.7660 | AUPRC: 0.4106



                                                                              

Epoch 2/10
  Train Loss: 0.3358 | AUCROC: 0.7849 | AUPRC: 0.4156
  Val   Loss: 0.3323 | AUCROC: 0.8149 | AUPRC: 0.4619



                                                                              

Epoch 3/10
  Train Loss: 0.3067 | AUCROC: 0.8345 | AUPRC: 0.4951
  Val   Loss: 0.3138 | AUCROC: 0.8346 | AUPRC: 0.4585



                                                                              

Epoch 4/10
  Train Loss: 0.2879 | AUCROC: 0.8594 | AUPRC: 0.5262
  Val   Loss: 0.3149 | AUCROC: 0.8301 | AUPRC: 0.4593



                                                                              

Epoch 5/10
  Train Loss: 0.2695 | AUCROC: 0.8770 | AUPRC: 0.5970
  Val   Loss: 0.3277 | AUCROC: 0.8309 | AUPRC: 0.4490



                                                                              

Epoch 6/10
  Train Loss: 0.2519 | AUCROC: 0.8945 | AUPRC: 0.6322
  Val   Loss: 0.3224 | AUCROC: 0.8280 | AUPRC: 0.4475



                                                                              

Epoch 7/10
  Train Loss: 0.2304 | AUCROC: 0.9111 | AUPRC: 0.7019
  Val   Loss: 0.3425 | AUCROC: 0.8247 | AUPRC: 0.4335



                                                                              

Epoch 8/10
  Train Loss: 0.2229 | AUCROC: 0.9179 | AUPRC: 0.7137
  Val   Loss: 0.3426 | AUCROC: 0.8170 | AUPRC: 0.4170



                                                                              

Epoch 9/10
  Train Loss: 0.2086 | AUCROC: 0.9274 | AUPRC: 0.7494
  Val   Loss: 0.3892 | AUCROC: 0.8181 | AUPRC: 0.4271



                                                                               

Epoch 10/10
  Train Loss: 0.1877 | AUCROC: 0.9429 | AUPRC: 0.7939
  Val   Loss: 0.4143 | AUCROC: 0.8162 | AUPRC: 0.4330





## Evaluation of Model

In [7]:
avg_loss, aucroc, auprc = evaluate_model(model, test_loader, criterion, device)
print(f"Test Loss: {avg_loss:.4f}, AUC-ROC: {aucroc:.4f}, AUC-PRC: {auprc:.4f}")

                                                                        

Evaluation - Loss: 0.4082 - AUCROC: 0.8151 - AUPRC: 0.4571
Test Loss: 0.4082, AUC-ROC: 0.8151, AUC-PRC: 0.4571




# Attempt 2 - LSTM - Model Implementation (Mean Pooling)

In [8]:
class LSTM_Model_Pooling(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, num_classes=1, dropout=0.3):
        super(LSTM_Model_Pooling, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,       # 40 features per time step
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x: (batch_size, seq_len, input_size)
        out, _ = self.lstm(x)           # out: (batch_size, seq_len, hidden_size)
        out = out.mean(dim=1)           # Pooling: (batch_size, hidden_size)   
        out = self.fc(out)              # (batch_size, num_classes)
        return out.squeeze()            # (batch_size,) for BCEWithLogitsLoss

In [9]:
# Use the previous data loaders and train the new model
model_pooling = LSTM_Model_Pooling(input_size=input_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_pooling.parameters(), lr=0.001)

model_pooling = train_model_with_validation(model_pooling, train_loader, validation_loader, criterion, optimizer, device)

                                                                              

Epoch 1/10
  Train Loss: 0.4303 | AUCROC: 0.5825 | AUPRC: 0.1721
  Val   Loss: 0.3578 | AUCROC: 0.7557 | AUPRC: 0.3545



                                                                              

Epoch 2/10
  Train Loss: 0.3372 | AUCROC: 0.7851 | AUPRC: 0.3899
  Val   Loss: 0.3359 | AUCROC: 0.7962 | AUPRC: 0.3973



                                                                              

Epoch 3/10
  Train Loss: 0.3172 | AUCROC: 0.8206 | AUPRC: 0.4467
  Val   Loss: 0.3271 | AUCROC: 0.8122 | AUPRC: 0.4244



                                                                              

Epoch 4/10
  Train Loss: 0.3038 | AUCROC: 0.8403 | AUPRC: 0.4773
  Val   Loss: 0.3369 | AUCROC: 0.8124 | AUPRC: 0.4237



                                                                              

Epoch 5/10
  Train Loss: 0.2898 | AUCROC: 0.8596 | AUPRC: 0.5134
  Val   Loss: 0.3544 | AUCROC: 0.8060 | AUPRC: 0.4219



                                                                              

Epoch 6/10
  Train Loss: 0.2732 | AUCROC: 0.8784 | AUPRC: 0.5749
  Val   Loss: 0.3342 | AUCROC: 0.8169 | AUPRC: 0.4144



                                                                              

Epoch 7/10
  Train Loss: 0.2632 | AUCROC: 0.8893 | AUPRC: 0.5920
  Val   Loss: 0.3268 | AUCROC: 0.8192 | AUPRC: 0.4318



                                                                              

Epoch 8/10
  Train Loss: 0.2405 | AUCROC: 0.9123 | AUPRC: 0.6384
  Val   Loss: 0.3790 | AUCROC: 0.8030 | AUPRC: 0.4090



                                                                              

Epoch 9/10
  Train Loss: 0.2213 | AUCROC: 0.9270 | AUPRC: 0.6865
  Val   Loss: 0.3855 | AUCROC: 0.8059 | AUPRC: 0.4194



                                                                               

Epoch 10/10
  Train Loss: 0.2070 | AUCROC: 0.9373 | AUPRC: 0.7342
  Val   Loss: 0.4794 | AUCROC: 0.7844 | AUPRC: 0.3979





In [10]:
# Now evaluate the model
avg_loss, aucroc, auprc = evaluate_model(model_pooling, test_loader, criterion, device)
print(f"Test Loss: {avg_loss:.4f}, AUC-ROC: {aucroc:.4f}, AUC-PRC: {auprc:.4f}")

                                                                        

Evaluation - Loss: 0.4467 - AUCROC: 0.7894 - AUPRC: 0.4138
Test Loss: 0.4467, AUC-ROC: 0.7894, AUC-PRC: 0.4138




# Attempt 3 - Bidirectional LSTM - Model Implementation

In [14]:
class LSTM_Model_Bi(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, num_classes=1, dropout=0.3):
        super(LSTM_Model_Bi, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,       # 41 features per time step
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout,
            bidirectional=True
        )
        self.fc = nn.Linear(hidden_size * 2, num_classes) # *2 for bidirectional

    def forward(self, x):
        # x: (batch_size, seq_len, input_size)
        out, _ = self.lstm(x)           # out: (batch_size, seq_len, hidden_size)
        out = out[:, -1, :]             # Take last time step: (batch_size, hidden_size)
        out = self.fc(out)              # (batch_size, num_classes)
        return out.squeeze()            # (batch_size,) for BCEWithLogitsLoss

In [15]:
# Train the model
model_bi = LSTM_Model_Bi(input_size=input_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_bi.parameters(), lr=0.001)

model_bi = train_model_with_validation(model_bi, train_loader, validation_loader, criterion, optimizer, device)

                                                                              

Epoch 1/10
  Train Loss: 0.4344 | AUCROC: 0.6091 | AUPRC: 0.1905
  Val   Loss: 0.3563 | AUCROC: 0.7748 | AUPRC: 0.4063



                                                                              

Epoch 2/10
  Train Loss: 0.3251 | AUCROC: 0.8053 | AUPRC: 0.4253
  Val   Loss: 0.3240 | AUCROC: 0.8168 | AUPRC: 0.4521



                                                                              

Epoch 3/10
  Train Loss: 0.3040 | AUCROC: 0.8380 | AUPRC: 0.4865
  Val   Loss: 0.3178 | AUCROC: 0.8239 | AUPRC: 0.4602



                                                                              

Epoch 4/10
  Train Loss: 0.2859 | AUCROC: 0.8616 | AUPRC: 0.5321
  Val   Loss: 0.3152 | AUCROC: 0.8315 | AUPRC: 0.4608



                                                                              

Epoch 5/10
  Train Loss: 0.2695 | AUCROC: 0.8804 | AUPRC: 0.5854
  Val   Loss: 0.3305 | AUCROC: 0.8288 | AUPRC: 0.4489



                                                                              

Epoch 6/10
  Train Loss: 0.2503 | AUCROC: 0.8995 | AUPRC: 0.6372
  Val   Loss: 0.3307 | AUCROC: 0.8395 | AUPRC: 0.4628



                                                                              

Epoch 7/10
  Train Loss: 0.2296 | AUCROC: 0.9169 | AUPRC: 0.6813
  Val   Loss: 0.3777 | AUCROC: 0.8178 | AUPRC: 0.4408



                                                                              

Epoch 8/10
  Train Loss: 0.2167 | AUCROC: 0.9259 | AUPRC: 0.7237
  Val   Loss: 0.3786 | AUCROC: 0.8266 | AUPRC: 0.4423



                                                                              

Epoch 9/10
  Train Loss: 0.1980 | AUCROC: 0.9374 | AUPRC: 0.7709
  Val   Loss: 0.3929 | AUCROC: 0.8158 | AUPRC: 0.4355



                                                                               

Epoch 10/10
  Train Loss: 0.1772 | AUCROC: 0.9486 | AUPRC: 0.8204
  Val   Loss: 0.4428 | AUCROC: 0.8114 | AUPRC: 0.4183





In [16]:
# Now evaluate
avg_loss, aucroc, auprc = evaluate_model(model_bi, test_loader, criterion, device)
print(f"Test Loss: {avg_loss:.4f}, AUC-ROC: {aucroc:.4f}, AUC-PRC: {auprc:.4f}")

                                                                        

Evaluation - Loss: 0.4351 - AUCROC: 0.8020 - AUPRC: 0.4309
Test Loss: 0.4351, AUC-ROC: 0.8020, AUC-PRC: 0.4309




# Transformers - Model Implementation

In [63]:
import torch.nn

class TransformerClassifier(nn.Module):
    def __init__(self, input_size, num_classes=1, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.3):
        super().__init__()
        self.input_size = input_size

        # Project input features to model dimension
        self.embedding = nn.Linear(input_size, dim_feedforward)

        # Positional Encoding
        self.pos_encoder = PositionalEncoding(dim_feedforward, dropout)

        # Transformer Encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=dim_feedforward,
            nhead=nhead,
            dim_feedforward=dim_feedforward * 2,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Final classifier
        self.fc = nn.Linear(dim_feedforward, num_classes)

    def forward(self, x):
        # x: (batch, seq_len, input_size)

        if x.dim() == 2:
            x = x.unsqueeze(1) # (batch, 1, input_size)

        x = self.embedding(x)                # (batch, seq_len, d_model)
        #print("After embedding:", x.shape)  # Debug print
        x = self.pos_encoder(x)
        #print("After pos encoding:", x.shape)  # Debug print
        x = self.transformer_encoder(x)      # (batch, seq_len, d_model)
        #print("After transformer encoder:", x.shape)

        x = x.mean(dim=1)                    # mean pooling over time
        #print("After pooling:", x.shape)     # Debug print
        out = self.fc(x).squeeze()           # (batch,)
        #print("After fc:", out.shape)        # Debug print
        return out


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=500):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)  # (max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10000.0)) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

In [18]:
# Train the Transformer model
model_transformer = TransformerClassifier(input_size=input_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_transformer.parameters(), lr=0.001)

model_transformer = train_model_with_validation(model_transformer, train_loader, validation_loader, criterion, optimizer, device)

                                                                              

Epoch 1/10
  Train Loss: 0.3753 | AUCROC: 0.6900 | AUPRC: 0.2777
  Val   Loss: 0.3934 | AUCROC: 0.7895 | AUPRC: 0.3990



                                                                              

Epoch 2/10
  Train Loss: 0.3372 | AUCROC: 0.7825 | AUPRC: 0.3873
  Val   Loss: 0.3414 | AUCROC: 0.8046 | AUPRC: 0.4048



                                                                              

Epoch 3/10
  Train Loss: 0.3256 | AUCROC: 0.8034 | AUPRC: 0.4247
  Val   Loss: 0.3268 | AUCROC: 0.8188 | AUPRC: 0.4287



                                                                              

Epoch 4/10
  Train Loss: 0.3217 | AUCROC: 0.8135 | AUPRC: 0.4283
  Val   Loss: 0.3304 | AUCROC: 0.8255 | AUPRC: 0.4381



                                                                              

Epoch 5/10
  Train Loss: 0.3080 | AUCROC: 0.8324 | AUPRC: 0.4729
  Val   Loss: 0.3290 | AUCROC: 0.8247 | AUPRC: 0.4555



                                                                              

Epoch 6/10
  Train Loss: 0.3043 | AUCROC: 0.8369 | AUPRC: 0.4933
  Val   Loss: 0.3434 | AUCROC: 0.7989 | AUPRC: 0.4113



                                                                              

Epoch 7/10
  Train Loss: 0.2927 | AUCROC: 0.8530 | AUPRC: 0.5238
  Val   Loss: 0.3677 | AUCROC: 0.8188 | AUPRC: 0.4131



                                                                              

Epoch 8/10
  Train Loss: 0.2880 | AUCROC: 0.8574 | AUPRC: 0.5443
  Val   Loss: 0.3782 | AUCROC: 0.8138 | AUPRC: 0.3918



                                                                              

Epoch 9/10
  Train Loss: 0.2808 | AUCROC: 0.8658 | AUPRC: 0.5638
  Val   Loss: 0.3890 | AUCROC: 0.8075 | AUPRC: 0.3864



                                                                               

Epoch 10/10
  Train Loss: 0.2777 | AUCROC: 0.8709 | AUPRC: 0.5644
  Val   Loss: 0.4118 | AUCROC: 0.8173 | AUPRC: 0.4136





In [19]:
# Evaluate the model
avg_loss, aucroc, auprc = evaluate_model(model_transformer, test_loader, criterion, device)
print(f"Test Loss: {avg_loss:.4f}, AUC-ROC: {aucroc:.4f}, AUC-PRC: {auprc:.4f}")

                                                                        

Evaluation - Loss: 0.3697 - AUCROC: 0.8141 - AUPRC: 0.4395
Test Loss: 0.3697, AUC-ROC: 0.8141, AUC-PRC: 0.4395


# Q2.3 - Tokenizing

In [21]:
# For this part, we need to load the initial data
set_a_initial, set_b_initial, set_c_initial = load_basic_data()
set_a_initial.head()

Shapes of the datasets:
Set A: (183416, 43) Set B: (183495, 43) Set C: (183711, 43)


Unnamed: 0,RecordID,Time,Age,BUN,Creatinine,GCS,Gender,Glucose,HCO3,HCT,...,PaCO2,PaO2,pH,DiasABP,MAP,SaO2,SysABP,Lactate,Cholesterol,TroponinI
0,132539.0,2025-03-10 00:00:00,54.0,,,,0.0,,,,...,,,,,,,,,,
1,132539.0,2025-03-10 01:00:00,,,,15.0,,,,,...,,,,,,,,,,
2,132539.0,2025-03-10 02:00:00,,,,,,,,,...,,,,,,,,,,
3,132539.0,2025-03-10 03:00:00,,,,,,,,,...,,,,,,,,,,
4,132539.0,2025-03-10 04:00:00,,,,15.0,,,,33.7,...,,,,,,,,,,


## Create the TZV Dataframe (following Horn et al.)

In [44]:
from sklearn.preprocessing import MinMaxScaler

def build_TZV_dataframe(original_df, label_df, base_time="2025-03-10 00:00:00", duration_hours=48):
    """
    Build a long-format dataframe with columns [T, Z, V, y] from an original wide dataframe.
    
    Parameters:
        original_df (pd.DataFrame): DataFrame with columns [RecordID, Time, f1, f2, ..., f41].
        label_df (pd.DataFrame): DataFrame with columns [RecordID, y] containing the label for each RecordID.
        base_time (str): Base time used for normalizing the Time column.
        duration_hours (int): The duration (in hours) from base_time over which Time is normalized (here, 48 hours).
    
    Returns:
        long_df (pd.DataFrame): Long-format dataframe with columns:
                                T: normalized time [0, 1],
                                Z: index of the feature,
                                V: scaled measurement value,
                                y: label corresponding to RecordID.
        feature_to_index (dict): Mapping from original feature names to integer indices.
    """
    # Merge the labels with the original dataframe using RecordID.
    df = original_df.copy().merge(label_df, on="RecordID", how="left")
    
    # Convert Time to datetime and compute normalized time T.
    df["Time"] = pd.to_datetime(df["Time"])
    start_time = pd.to_datetime(base_time)
    end_time = start_time + pd.Timedelta(hours=duration_hours)
    total_seconds = (end_time - start_time).total_seconds()
    df["T"] = (df["Time"] - start_time).dt.total_seconds() / total_seconds
    
    # Identify feature columns: all columns except RecordID, Time, T, and y.
    feature_cols = [col for col in df.columns if col not in ["RecordID", "Time", "T", "In-hospital_death"]]
    
    # Scale each feature individually using MinMaxScaler.
    scaler = MinMaxScaler()
    df[feature_cols] = scaler.fit_transform(df[feature_cols])
    
    # Melt the dataframe from wide to long format.
    # The id_vars ("T" and "y") are preserved for each measurement.
    long_df = pd.melt(df, id_vars=["T", "In-hospital_death"], value_vars=feature_cols, 
                      var_name="Z", value_name="V")
    
    # Map feature names to indices for the "Z" column.
    feature_to_index = {feat: idx for idx, feat in enumerate(feature_cols)}
    long_df["Z"] = long_df["Z"].map(feature_to_index)
    
    # Sort the final dataframe by normalized time T and reset the index.
    long_df = long_df.sort_values("T").reset_index(drop=True)
    long_df = long_df.dropna(subset=["V"])
    
    return long_df, feature_to_index

In [57]:
# Build the TZV dataframes
TZV_a, feature_to_index_a = build_TZV_dataframe(set_a_initial, death_a)
TZV_b, feature_to_index_b = build_TZV_dataframe(set_b_initial, death_b)
TZV_c, feature_to_index_c = build_TZV_dataframe(set_c_initial, death_c)

print(TZV_a.shape)
TZV_a.head(10)

(1456736, 4)


Unnamed: 0,T,In-hospital_death,Z,V
0,0.0,0,0,0.52
417,0.0,0,34,0.16791
551,0.0,0,34,0.186567
565,0.0,1,8,0.528571
566,0.0,0,8,0.347619
1229,0.0,0,8,0.390476
1307,0.0,0,8,0.433333
1606,0.0,0,34,0.160448
1607,0.0,1,8,0.542857
1618,0.0,0,34,0.175373


In [46]:
# Check for the total number of not NaN values under some specified columns
selected_cols = [col for col in set_a_initial.columns if col not in ["RecordID", "Time"]]
set_a_initial[selected_cols].notna().sum().sum()

1456736

Checked that the number of not NaN values is the same as the rows of the new dataframe! Let's go
(We have to believe in this format)

## Train the TZV Format with a Transformer

In [58]:
# Remove the In-hospital_death column from the TZV dataframes, but save it
y_a = TZV_a.pop("In-hospital_death")
y_b = TZV_b.pop("In-hospital_death")
y_c = TZV_c.pop("In-hospital_death")

# Convert the TZV dataframes to PyTorch tensors
X_a = torch.tensor(TZV_a[["T", "Z", "V"]].values, dtype=torch.float32)
X_b = torch.tensor(TZV_b[["T", "Z", "V"]].values, dtype=torch.float32)
X_c = torch.tensor(TZV_c[["T", "Z", "V"]].values, dtype=torch.float32)
print(X_a.shape, X_b.shape, X_c.shape)

# Create the datasets and dataloaders
from torch.utils.data import TensorDataset

dataset_a = TensorDataset(X_a, torch.tensor(y_a.values, dtype=torch.float32))
dataset_b = TensorDataset(X_b, torch.tensor(y_b.values, dtype=torch.float32))
dataset_c = TensorDataset(X_c, torch.tensor(y_c.values, dtype=torch.float32))

loader_a = DataLoader(dataset_a, batch_size=64, shuffle=True)
loader_b = DataLoader(dataset_b, batch_size=64, shuffle=False)
loader_c = DataLoader(dataset_c, batch_size=64, shuffle=False)

torch.Size([1456736, 3]) torch.Size([1459862, 3]) torch.Size([1454964, 3])


In [64]:
model_tvz = TransformerClassifier(input_size=3).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_tvz.parameters(), lr=0.001)

model_tvz = train_model_with_validation(model_tvz, loader_a, loader_b, criterion, optimizer, device)

                                                                                     

Epoch 1/10
  Train Loss: 0.4153 | AUCROC: 0.5007 | AUPRC: 0.1453
  Val   Loss: 0.4280 | AUCROC: 0.5100 | AUPRC: 0.1576



                                                                                     

Epoch 2/10
  Train Loss: 0.4149 | AUCROC: 0.5003 | AUPRC: 0.1452
  Val   Loss: 0.4273 | AUCROC: 0.4863 | AUPRC: 0.1459



                                                                                     

Epoch 3/10
  Train Loss: 0.4148 | AUCROC: 0.4994 | AUPRC: 0.1452
  Val   Loss: 0.4324 | AUCROC: 0.4846 | AUPRC: 0.1456



                                                                                     

KeyboardInterrupt: 

Training takes a lot of time, it's 1 million of rows per table...

I could make a script.py and run it on Euler...