In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np

# Define the number of subjects and time points
num_subjects = 100
time_points = ['T1', 'T2', 'T3']

# Function to categorize blood pressure
def categorize_bp(systolic_bp):
    if systolic_bp < 120:
        return 0 #'Normal'
    elif 120 <= systolic_bp <= 129:
        return 1 #'Elevated'
    else:
        return 2 #'Hypertension'

# Generate sample data
np.random.seed(42)  # For reproducibility
data = {
    'SubjectID': np.repeat([f'subject_{i:03}' for i in range(1, num_subjects + 1)], len(time_points)),
    'TimePoint': time_points * num_subjects,
    'SystolicBP': np.random.randint(110, 150, num_subjects * len(time_points)),
    'DiastolicBP': np.random.randint(70, 90, num_subjects * len(time_points))
}

# Create DataFrame
df_longitudinal = pd.DataFrame(data)

# Filter T3 time points to add the BP category
df_longitudinal['BPCategory'] = df_longitudinal['SystolicBP'].apply(categorize_bp)

# Calculate the number of rows to remove, to test whether it is robust to missing rows
#num_rows_to_remove = int(len(df_longitudinal) * 0.2)
#df_longitudinal = df_longitudinal.sample(frac=1, random_state=42).reset_index(drop=True)
#df_longitudinal = df_longitudinal.iloc[num_rows_to_remove:]

all_subjects = list(df_longitudinal['SubjectID'].unique())
train_subjects, test_subjects = all_subjects[:80], all_subjects[80:]
df_train = df_longitudinal[df_longitudinal['SubjectID'].isin(train_subjects)]
df_test = df_longitudinal[df_longitudinal['SubjectID'].isin(test_subjects)]

dataset_path = "C:\\Users\\camgonza\\Box\\Camila Gonzalez's Files\\Data\\NCANDA\\NCANDA_experiments\\long_testground"
df_longitudinal.to_csv(dataset_path + "/df_longitudinal.csv", index=False)


In [3]:
from lln.data.pytorch.get_dataset import LongDataset
from collections import OrderedDict
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from lln.models.longitudinal.LSTM import LSTM
from lln.training.LongitudinalTrainer import LongitudinalTrainer
seq_to_seq = True
input_dim = 2  # Number of features (systolic BP)
hidden_dim = 100  # Number of hidden layers
output_dim = 3  # Number of output classes (Normal, Elevated, Hypertension)
n_layers = 1  # Number of LSTM layers

In [4]:
# Define PyTorch datasets and dataloaders
datasets = OrderedDict([('Train', LongDataset(df_train, feature_cols=['SystolicBP', 'DiastolicBP'], target_col='BPCategory', seq_to_seq=seq_to_seq, id_col='SubjectID', seq_col='TimePoint', timepoints=['T1', 'T2', 'T3'])),
            ('Test', LongDataset(df_test, feature_cols=['SystolicBP', 'DiastolicBP'], target_col='BPCategory', seq_to_seq=seq_to_seq, id_col='SubjectID', seq_col='TimePoint', timepoints=['T1', 'T2', 'T3']))])
print(datasets['Train'].subjects)
print(datasets['Test'].subjects)

['subject_001', 'subject_002', 'subject_003', 'subject_004', 'subject_005', 'subject_006', 'subject_007', 'subject_008', 'subject_009', 'subject_010', 'subject_011', 'subject_012', 'subject_013', 'subject_014', 'subject_015', 'subject_016', 'subject_017', 'subject_018', 'subject_019', 'subject_020', 'subject_021', 'subject_022', 'subject_023', 'subject_024', 'subject_025', 'subject_026', 'subject_027', 'subject_028', 'subject_029', 'subject_030', 'subject_031', 'subject_032', 'subject_033', 'subject_034', 'subject_035', 'subject_036', 'subject_037', 'subject_038', 'subject_039', 'subject_040', 'subject_041', 'subject_042', 'subject_043', 'subject_044', 'subject_045', 'subject_046', 'subject_047', 'subject_048', 'subject_049', 'subject_050', 'subject_051', 'subject_052', 'subject_053', 'subject_054', 'subject_055', 'subject_056', 'subject_057', 'subject_058', 'subject_059', 'subject_060', 'subject_061', 'subject_062', 'subject_063', 'subject_064', 'subject_065', 'subject_066', 'subject_

In [5]:
# Create dataloaders
batch_size = 10
dataloaders = OrderedDict([(dataset_name, DataLoader(dataset, batch_size=batch_size, shuffle=True))
    for dataset_name, dataset in datasets.items()])
for X, y in dataloaders['Train']:
    print(f"Shape of X: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X: torch.Size([10, 3, 2])
Shape of y: torch.Size([10, 3]) torch.int64


In [6]:
# Define model
save_path = "C:\\Users\\camgonza\\Box\\Camila Gonzalez's Files\\Data\\NCANDA\\NCANDA_experiments\\long_testground\\models"
model = LSTM(input_dim, hidden_dim, output_dim, save_path=save_path, nr_layers=n_layers, seq_to_seq=seq_to_seq)
print(model)

LSTM(
  (lstm): LSTM(2, 100, batch_first=True)
  (fc): Linear(in_features=100, out_features=3, bias=True)
)


In [7]:
# Define optimizer and trainer
loss_f = nn.CrossEntropyLoss()
trainer_path = "C:\\Users\\camgonza\\Box\\Camila Gonzalez's Files\\Data\\NCANDA\\NCANDA_experiments\\long_testground\\trainer"
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
trainer = LongitudinalTrainer(trainer_path, 'cpu', optimizer, loss_f, seq_to_seq=seq_to_seq, labels=["Normal", "Elevated", "Hypertension"])

In [8]:
# Train model
nr_epochs = 100
trainer.train(model, dataloaders['Train'], dataloaders, 
              nr_epochs=nr_epochs, starting_from_epoch=0,
              print_loss_every=int(nr_epochs/10), eval_every=int(nr_epochs/10), export_every=int(nr_epochs/5), verbose=True)

  1%|          | 1/100 [00:00<00:59,  1.66it/s]

Epoch 0
Train CrossEntropyLoss: 1.165 B-Acc.: 0.318 F1: 0.135
Test CrossEntropyLoss: 1.215 B-Acc.: 0.333 F1: 0.095
Saved PyTorch model state LSTM_epoch0.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\models
Saved trainer state LongitudinalTrainer_optimizer_epoch0.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer\states
Progress stored in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer
Ending epoch 1, loss 1.1023691147565842


 15%|█▌        | 15/100 [00:01<00:06, 12.63it/s]

Epoch 10
Train CrossEntropyLoss: 0.702 B-Acc.: 0.570 F1: 0.517
Test CrossEntropyLoss: 0.631 B-Acc.: 0.567 F1: 0.553
Ending epoch 11, loss 0.7000902071595192


 21%|██        | 21/100 [00:02<00:09,  8.72it/s]

Epoch 20
Train CrossEntropyLoss: 0.621 B-Acc.: 0.636 F1: 0.611
Test CrossEntropyLoss: 0.568 B-Acc.: 0.638 F1: 0.626
Saved PyTorch model state LSTM_epoch20.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\models
Saved trainer state LongitudinalTrainer_optimizer_epoch20.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer\states
Progress stored in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer
Ending epoch 21, loss 0.6246859803795815


 33%|███▎      | 33/100 [00:03<00:06, 10.45it/s]

Epoch 30
Train CrossEntropyLoss: 0.587 B-Acc.: 0.648 F1: 0.628
Test CrossEntropyLoss: 0.540 B-Acc.: 0.638 F1: 0.636
Ending epoch 31, loss 0.6089584305882454


 43%|████▎     | 43/100 [00:04<00:06,  8.97it/s]

Epoch 40
Train CrossEntropyLoss: 0.561 B-Acc.: 0.663 F1: 0.656
Test CrossEntropyLoss: 0.519 B-Acc.: 0.666 F1: 0.696
Saved PyTorch model state LSTM_epoch40.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\models
Saved trainer state LongitudinalTrainer_optimizer_epoch40.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer\states
Progress stored in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer
Ending epoch 41, loss 0.5725197978317738


 53%|█████▎    | 53/100 [00:04<00:05,  8.90it/s]

Epoch 50
Train CrossEntropyLoss: 0.537 B-Acc.: 0.689 F1: 0.690
Test CrossEntropyLoss: 0.506 B-Acc.: 0.685 F1: 0.706
Ending epoch 51, loss 0.5577188208699226


 59%|█████▉    | 59/100 [00:05<00:03, 12.97it/s]

Epoch 60
Train CrossEntropyLoss: 0.528 B-Acc.: 0.675 F1: 0.684
Test CrossEntropyLoss: 0.499 B-Acc.: 0.694 F1: 0.730
Saved PyTorch model state LSTM_epoch60.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\models
Saved trainer state LongitudinalTrainer_optimizer_epoch60.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer\states


 62%|██████▏   | 62/100 [00:06<00:06,  6.13it/s]

Progress stored in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer
Ending epoch 61, loss 0.5305501036345959


 74%|███████▍  | 74/100 [00:07<00:02,  8.68it/s]

Epoch 70
Train CrossEntropyLoss: 0.508 B-Acc.: 0.647 F1: 0.640
Test CrossEntropyLoss: 0.484 B-Acc.: 0.683 F1: 0.721
Ending epoch 71, loss 0.5071765743196011


 80%|████████  | 80/100 [00:07<00:01, 13.13it/s]

Epoch 80
Train CrossEntropyLoss: 0.484 B-Acc.: 0.746 F1: 0.751
Test CrossEntropyLoss: 0.480 B-Acc.: 0.732 F1: 0.757
Saved PyTorch model state LSTM_epoch80.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\models
Saved trainer state LongitudinalTrainer_optimizer_epoch80.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer\states
Progress stored in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer
Ending epoch 81, loss 0.4998226761817932


 91%|█████████ | 91/100 [00:09<00:01,  6.30it/s]

Epoch 90
Train CrossEntropyLoss: 0.466 B-Acc.: 0.717 F1: 0.715
Test CrossEntropyLoss: 0.458 B-Acc.: 0.657 F1: 0.658
Ending epoch 91, loss 0.4623362310230732


100%|██████████| 100/100 [00:10<00:00,  9.84it/s]


Finished training
Epoch 100
Train CrossEntropyLoss: 0.448 B-Acc.: 0.722 F1: 0.730
Test CrossEntropyLoss: 0.442 B-Acc.: 0.694 F1: 0.726
Saved PyTorch model state LSTM_epoch100.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\models
Saved trainer state LongitudinalTrainer_optimizer_epoch100.pth in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer\states
Progress stored in C:\Users\camgonza\Box\Camila Gonzalez's Files\Data\NCANDA\NCANDA_experiments\long_testground\trainer
