In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
from collections import OrderedDict
import torch
torch.manual_seed(0)
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from tqdm import tqdm
from abcd.local.paths import output_path
from abcd.data.read_data import get_subjects_events_sf, subject_cols_to_events
import abcd.data.VARS as VARS
from abcd.data.define_splits import SITES, save_restore_sex_fmri_splits
from abcd.data.divide_with_splits import divide_events_by_splits
from abcd.data.var_tailoring.normalization import normalize_var
from abcd.data.pytorch.get_dataset import PandasDataset
from abcd.training.ClassifierTrainer import ClassifierTrainer
from abcd.local.paths import core_path
import abcd.data.VARS as VARS
from abcd.exp.Experiment import Experiment
import importlib

In [4]:
config = {'target_col': 'kbi_sex_assigned_at_birth',
          'features': ['fmri', 'smri'],
          'model': ['abcd.models.classification.FullyConnected', 'FullyConnected3'],
          'lr': 1e-3,
          'batch_size': 64,
          'nr_epochs': 150}

exp = Experiment(name='sex_fs_FC3', config=config)

In [5]:
# Fetch subjects and events
subjects_df, events_df = get_subjects_events_sf()
print("There are {} subjects and {} visits with imaging".format(len(subjects_df), len(events_df)))
# Leave only the baseline visits
events_df = events_df.loc[(events_df['eventname'] == 'baseline_year_1_arm_1')]
print("Leaving baseline visits, we have {} visits".format(len(events_df)))

There are 9632 subjects and 18808 visits with imaging
Leaving baseline visits, we have 9085 visits


In [6]:
# Add the target to the events df, if not there
target_col = config['target_col']
if target_col not in events_df.columns:
    events_df = subject_cols_to_events(subjects_df, events_df, columns=[target_col])

# Change ABCD values to class integers starting from 0
labels = sorted(list(set(events_df[target_col])))
for ix, label in enumerate(labels):
    events_df.loc[events_df[target_col] == label, target_col] = ix
labels = [VARS.VALUES[target_col][label] for label in labels] if target_col in VARS.VALUES else [str(label) for label in labels]

# Print label distribution
for val in set(events_df[target_col]):
    print('{} visits with {} target'.format(len(events_df.loc[events_df[target_col] == val]), labels[int(val)]))


4703 visits with Male target
4382 visits with Female target


In [7]:
# Define features
features_fmri = list(VARS.NAMED_CONNECTIONS.keys())
features_smri = [var_name + '_' + parcel for var_name in VARS.DESIKAN_STRUCT_FEATURES.keys() for parcel in VARS.DESIKAN_PARCELS[var_name] + VARS.DESIKAN_MEANS]
feature_cols = []
if 'fmri' in config['features']:
    feature_cols += features_fmri
if 'smri' in config['features']:
    feature_cols += features_smri

In [8]:
# Normalize features
for var_id in feature_cols:
    events_df = normalize_var(events_df, var_id, var_id)

In [9]:
# Divide events into training, validation and testing
splits = save_restore_sex_fmri_splits(k=5)
ood_site_id = SITES[0]
events_train, events_id_test, events_ood_test = divide_events_by_splits(events_df, splits, ood_site_id)
print("Nr. events train: {}, val: {}, test: {}".format(len(events_train), len(events_id_test), len(events_ood_test)))

Nr. events train: 7047, val: 1754, test: 284


In [10]:
# Define PyTorch datasets and dataloaders
datasets = OrderedDict([('Train', PandasDataset(events_train, feature_cols, target_col)),
            ('Val', PandasDataset(events_id_test, feature_cols, target_col)),
            ('Test', PandasDataset(events_ood_test, feature_cols, target_col))])

In [11]:
# Create dataloaders
batch_size = config['batch_size']
dataloaders = OrderedDict([(dataset_name, DataLoader(dataset, batch_size=batch_size, shuffle=True))
    for dataset_name, dataset in datasets.items()])

for X, y in dataloaders['Train']:
    print(f"Shape of X: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X: torch.Size([64, 737])
Shape of y: torch.Size([64]) torch.int64


In [12]:
# Determine device for training
device = ("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print("Using {} device".format(device))

Using cpu device


In [13]:
# Define model
models_path = os.path.join(exp.path, 'models')
module = importlib.import_module(config['model'][0])
model = getattr(module, config['model'][1])(save_path=models_path, labels=labels, input_size=len(feature_cols))
#model = FullyConnected5(save_path=models_path, labels=labels, input_size=len(feature_cols))
model = model.to(device)
print(model)

FullyConnected3(
  (softmax): Softmax(dim=1)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_layers): Sequential(
    (0): Linear(in_features=737, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=2, bias=True)
  )
)


In [14]:
# Define optimizer and trainer
learning_rate = config['lr']
loss_f = nn.CrossEntropyLoss()
trainer_path = os.path.join(exp.path, 'trainer')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
trainer = ClassifierTrainer(trainer_path, device, optimizer, loss_f, labels=labels)

In [15]:
# Train model
nr_epochs = config['nr_epochs']
trainer.train(model, dataloaders['Train'], dataloaders, 
              nr_epochs=nr_epochs, starting_from_epoch=0,
              print_loss_every=int(nr_epochs/10), eval_every=int(nr_epochs/10), export_every=int(nr_epochs/5), verbose=True)

  0%|          | 0/150 [00:00<?, ?it/s]

Epoch 0
Train CrossEntropyLoss: 0.701 B-Acc.: 0.500 F1: 0.325
Val CrossEntropyLoss: 0.702 B-Acc.: 0.500 F1: 0.325
Test CrossEntropyLoss: 0.699 B-Acc.: 0.500 F1: 0.330
Saved PyTorch model state FullyConnected3_epoch0.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\models
Saved trainer state ClassifierTrainer_optimizer_epoch0.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer\states
Progress stored in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer


  1%|          | 1/150 [00:01<04:35,  1.85s/it]

Ending epoch 1, loss 0.6291009362216469


 10%|█         | 15/150 [00:25<04:39,  2.07s/it]

Epoch 15
Train CrossEntropyLoss: 0.363 B-Acc.: 0.835 F1: 0.836
Val CrossEntropyLoss: 0.396 B-Acc.: 0.821 F1: 0.822
Test CrossEntropyLoss: 0.368 B-Acc.: 0.813 F1: 0.813


 11%|█         | 16/150 [00:28<05:30,  2.46s/it]

Ending epoch 16, loss 0.37572511829234456


 20%|██        | 30/150 [00:56<03:54,  1.96s/it]

Epoch 30
Train CrossEntropyLoss: 0.341 B-Acc.: 0.848 F1: 0.849
Val CrossEntropyLoss: 0.381 B-Acc.: 0.828 F1: 0.829
Test CrossEntropyLoss: 0.351 B-Acc.: 0.837 F1: 0.837
Saved PyTorch model state FullyConnected3_epoch30.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\models
Saved trainer state ClassifierTrainer_optimizer_epoch30.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer\states
Progress stored in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer


 21%|██        | 31/150 [01:00<04:55,  2.48s/it]

Ending epoch 31, loss 0.326115421346716


 30%|███       | 45/150 [01:29<03:41,  2.11s/it]

Epoch 45
Train CrossEntropyLoss: 0.425 B-Acc.: 0.799 F1: 0.796
Val CrossEntropyLoss: 0.469 B-Acc.: 0.782 F1: 0.779
Test CrossEntropyLoss: 0.437 B-Acc.: 0.804 F1: 0.800


 31%|███       | 46/150 [01:32<04:10,  2.40s/it]

Ending epoch 46, loss 0.3337305576817409


 40%|████      | 60/150 [02:02<03:14,  2.17s/it]

Epoch 60
Train CrossEntropyLoss: 0.338 B-Acc.: 0.846 F1: 0.847
Val CrossEntropyLoss: 0.381 B-Acc.: 0.825 F1: 0.826
Test CrossEntropyLoss: 0.352 B-Acc.: 0.840 F1: 0.840
Saved PyTorch model state FullyConnected3_epoch60.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\models
Saved trainer state ClassifierTrainer_optimizer_epoch60.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer\states
Progress stored in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer


 41%|████      | 61/150 [02:06<04:04,  2.75s/it]

Ending epoch 61, loss 0.36267208274420315


 50%|█████     | 75/150 [02:37<02:44,  2.19s/it]

Epoch 75
Train CrossEntropyLoss: 0.328 B-Acc.: 0.856 F1: 0.857
Val CrossEntropyLoss: 0.374 B-Acc.: 0.833 F1: 0.834
Test CrossEntropyLoss: 0.381 B-Acc.: 0.837 F1: 0.837


 51%|█████     | 76/150 [02:40<03:07,  2.54s/it]

Ending epoch 76, loss 0.3069926300295838


 60%|██████    | 90/150 [03:11<02:09,  2.15s/it]

Epoch 90
Train CrossEntropyLoss: 0.421 B-Acc.: 0.811 F1: 0.801
Val CrossEntropyLoss: 0.482 B-Acc.: 0.799 F1: 0.789
Test CrossEntropyLoss: 0.500 B-Acc.: 0.795 F1: 0.786
Saved PyTorch model state FullyConnected3_epoch90.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\models
Saved trainer state ClassifierTrainer_optimizer_epoch90.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer\states
Progress stored in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer


 61%|██████    | 91/150 [03:15<02:39,  2.71s/it]

Ending epoch 91, loss 0.31932664159181956


 70%|███████   | 105/150 [03:45<01:36,  2.15s/it]

Epoch 105
Train CrossEntropyLoss: 0.285 B-Acc.: 0.878 F1: 0.876
Val CrossEntropyLoss: 0.338 B-Acc.: 0.851 F1: 0.849
Test CrossEntropyLoss: 0.347 B-Acc.: 0.829 F1: 0.826


 71%|███████   | 106/150 [03:48<01:48,  2.46s/it]

Ending epoch 106, loss 0.28300395017271646


 80%|████████  | 120/150 [04:18<01:02,  2.10s/it]

Epoch 120
Train CrossEntropyLoss: 0.487 B-Acc.: 0.787 F1: 0.784
Val CrossEntropyLoss: 0.562 B-Acc.: 0.774 F1: 0.769
Test CrossEntropyLoss: 0.449 B-Acc.: 0.804 F1: 0.801
Saved PyTorch model state FullyConnected3_epoch120.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\models
Saved trainer state ClassifierTrainer_optimizer_epoch120.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer\states
Progress stored in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer


 81%|████████  | 121/150 [04:22<01:21,  2.82s/it]

Ending epoch 121, loss 0.29539132319592143


 90%|█████████ | 135/150 [04:53<00:32,  2.17s/it]

Epoch 135
Train CrossEntropyLoss: 0.289 B-Acc.: 0.871 F1: 0.872
Val CrossEntropyLoss: 0.348 B-Acc.: 0.848 F1: 0.849
Test CrossEntropyLoss: 0.347 B-Acc.: 0.844 F1: 0.844


 91%|█████████ | 136/150 [04:57<00:35,  2.55s/it]

Ending epoch 136, loss 0.27310926567863775


100%|██████████| 150/150 [05:36<00:00,  2.24s/it]


Finished training
Epoch 150
Train CrossEntropyLoss: 0.257 B-Acc.: 0.889 F1: 0.889
Val CrossEntropyLoss: 0.333 B-Acc.: 0.856 F1: 0.857
Test CrossEntropyLoss: 0.309 B-Acc.: 0.831 F1: 0.831
Saved PyTorch model state FullyConnected3_epoch150.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\models
Saved trainer state ClassifierTrainer_optimizer_epoch150.pth in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer\states
Progress stored in C:\\Users\camgonza\Box\Camila Gonzalez's Files\Data\ABCD\output\exp\sex_fs_FC3\trainer


In [17]:
exp.finish(notes='device: {}'.format(device))