In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
from collections import OrderedDict
import torch
torch.manual_seed(0)
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from tqdm import tqdm
from abcd.local.paths import output_path
from abcd.data.read_data import get_subjects_events_sf, add_event_vars, add_subject_vars, filter_events, filter_subjects
import abcd.data.VARS as VARS
from abcd.data.define_splits import SITES, save_restore_sex_fmri_splits
from abcd.data.divide_with_splits import divide_events_by_splits
from abcd.data.var_tailoring.normalization import normalize_var
from abcd.data.pytorch.get_dataset import PandasDataset
from abcd.local.paths import core_path, output_path
from abcd.data.VARS import STRUCT_FILES, STRUCT_FEATURES

from abcd.models.regression.LinearRegressor import LinearRegressor
from abcd.training.RegressorTrainer import RegressorTrainer



# Preprocess
1. Fetch subject and events dataframes
2. Change sex assigned at birth labels to 0 (male), 1 (female)
3. Filter out all non-baseline events
4. Define target column (sex assigned at birth), and features columns (functional connectivities and structural features)
5. Normalize features
6. train/val/test split

In [3]:
# Fetch subjects and events
subjects_df, events_df = get_subjects_events_sf()
# Change to 0 (male) and 1 (female)
events_df.loc[events_df["kbi_sex_assigned_at_birth"] == 1.0, "kbi_sex_assigned_at_birth"] = float(0)
events_df.loc[events_df["kbi_sex_assigned_at_birth"] == 2.0, "kbi_sex_assigned_at_birth"] = float(1)
# Leave only the baseline visits
events_df = events_df.loc[(events_df['eventname'] == 'baseline_year_1_arm_1')]
print("Leaving only baseline visits, we have {} events".format(len(events_df)))

Leaving only baseline visits, we have 9086 events


In [4]:
# Define target and features
target_col = 'kbi_sex_assigned_at_birth'
labels = ["Male", "Female"]
feature_cols = list(VARS.NAMED_CONNECTIONS.keys()) + list(VARS.STRUCT_FEATURES.keys())

# Print distribution of baseline class
for val in set(events_df['kbi_sex_assigned_at_birth']):
    print('{} visits with {} target'.format(len(events_df.loc[events_df["kbi_sex_assigned_at_birth"] == val]), labels[int(val)]))

4704 visits with Male target
4382 visits with Female target


In [5]:
# Normalize features
for var_id in feature_cols:
    events_df = normalize_var(events_df, var_id, var_id)

In [6]:
# Divide events into training, validation and testing
splits = save_restore_sex_fmri_splits(k=5)
ood_site_id = SITES[0]
events_train, events_id_test, events_ood_test = divide_events_by_splits(events_df, splits, ood_site_id)
print("Nr. events train: {}, val: {}, test: {}".format(len(events_train), len(events_id_test), len(events_ood_test)))

Nr. events train: 7064, val: 1738, test: 284


# Linear Regression

In [7]:
# Define PyTorch datasets and dataloaders
datasets = OrderedDict([('train', PandasDataset(events_train, feature_cols, target_col)),
            ('val', PandasDataset(events_id_test, feature_cols, target_col)),
            ('test', PandasDataset(events_ood_test, feature_cols, target_col))])

In [8]:
# Create dataloaders
batch_size = 64
dataloaders = OrderedDict([(dataset_name, DataLoader(dataset, batch_size=batch_size, shuffle=True))
    for dataset_name, dataset in datasets.items()])

for X, y in dataloaders['train']:
    print(f"Shape of X: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X: torch.Size([64, 177])
Shape of y: torch.Size([64]) torch.int64


In [9]:
# Determine device for training
device = "cpu" #("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print("Using {} device".format(device))

Using cpu device


In [10]:
# Define model
models_path = os.path.join(output_path, 'ABCD_sex_prediction_lr', 'models')

model = LinearRegressor(save_path=models_path, input_size=len(feature_cols))

model = model.to(device)
print(model)

LinearRegressor(
  (linear): Linear(in_features=177, out_features=1, bias=True)
)


In [11]:
# Define optimizer and trainer
learning_rate = 1e-1
loss_f = nn.MSELoss()

trainer_path = os.path.join(output_path, 'ABCD_sex_prediction_lr', 'results')
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

trainer = RegressorTrainer(trainer_path, device, optimizer, loss_f)

In [15]:
# Train model
trainer.train(model, dataloaders['train'], dataloaders, 
              nr_epochs=100, starting_from_epoch=0,
              print_loss_every=1, eval_every=10, export_every=50, verbose=True)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  5%|▌         | 5/100 [00:00<00:04, 20.68it/s]

Epoch 0
train MSELoss: 0.250 MSE: 0.249
val MSELoss: 0.250 MSE: 0.248
test MSELoss: 0.251 MSE: 0.250
Saved PyTorch model state LinearRegressor_epoch0.pth in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/models
Saved trainer state RegressorTrainer_optimizer_epoch0.pth in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/results/states
Progress stored in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/results
Ending epoch 1, loss 0.2501215044711087
Ending epoch 2, loss 0.2500013605699883
Ending epoch 3, loss 0.2501028259326746
Ending epoch 4, loss 0.25005126603551814
Ending epoch 5, loss 0.2500514667313378


  8%|▊         | 8/100 [00:00<00:04, 22.95it/s]

Ending epoch 6, loss 0.2501188895186862
Ending epoch 7, loss 0.25014638350353585
Ending epoch 8, loss 0.250043273374841
Ending epoch 9, loss 0.24983439829435433
Ending epoch 10, loss 0.24997380362437652
Epoch 10
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.248
test MSELoss: 0.251 MSE: 0.250


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 14%|█▍        | 14/100 [00:00<00:03, 21.82it/s]

Ending epoch 11, loss 0.25007448795142473
Ending epoch 12, loss 0.250123622315424
Ending epoch 13, loss 0.2501865390453253
Ending epoch 14, loss 0.2501150311918946
Ending epoch 15, loss 0.25012195405659376


 20%|██        | 20/100 [00:00<00:03, 22.98it/s]

Ending epoch 16, loss 0.24997746434297646
Ending epoch 17, loss 0.2500513491329846
Ending epoch 18, loss 0.24998286702074446
Ending epoch 19, loss 0.25001791052453154
Ending epoch 20, loss 0.24994013492051545


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 23%|██▎       | 23/100 [00:01<00:03, 20.27it/s]

Epoch 20
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.249
test MSELoss: 0.250 MSE: 0.249
Ending epoch 21, loss 0.2501317739218205
Ending epoch 22, loss 0.24994032621920645
Ending epoch 23, loss 0.25013991935296104
Ending epoch 24, loss 0.24985743293891083


 29%|██▉       | 29/100 [00:01<00:03, 22.82it/s]

Ending epoch 25, loss 0.2500385633997015
Ending epoch 26, loss 0.25006073458237693
Ending epoch 27, loss 0.24994125411854134
Ending epoch 28, loss 0.24987785464471524
Ending epoch 29, loss 0.2501477882400289
Ending epoch 30, loss 0.2500424299154196


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 35%|███▌      | 35/100 [00:01<00:02, 22.64it/s]

Epoch 30
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.249
test MSELoss: 0.250 MSE: 0.249
Ending epoch 31, loss 0.2500469166416306
Ending epoch 32, loss 0.25001323155991667
Ending epoch 33, loss 0.24993157333081906
Ending epoch 34, loss 0.2499672444285573
Ending epoch 35, loss 0.2501228671890121


 38%|███▊      | 38/100 [00:01<00:02, 23.63it/s]

Ending epoch 36, loss 0.2501449887011502
Ending epoch 37, loss 0.2501290389546403
Ending epoch 38, loss 0.24994212617208292
Ending epoch 39, loss 0.2501225421826045
Ending epoch 40, loss 0.2500255368314348
Epoch 40
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.248
test MSELoss: 0.250 MSE: 0.250


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 44%|████▍     | 44/100 [00:01<00:02, 22.81it/s]

Ending epoch 41, loss 0.2501648612119056
Ending epoch 42, loss 0.24997368709998088
Ending epoch 43, loss 0.24998003244400024
Ending epoch 44, loss 0.2500488085521234
Ending epoch 45, loss 0.24995277728046383
Ending epoch 46, loss 0.2500002635223372


 50%|█████     | 50/100 [00:02<00:02, 24.04it/s]

Ending epoch 47, loss 0.2499637333927928
Ending epoch 48, loss 0.2499457040617058
Ending epoch 49, loss 0.2500243052705988
Ending epoch 50, loss 0.2501085342856141
Epoch 50
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.248
test MSELoss: 0.250 MSE: 0.249
Saved PyTorch model state LinearRegressor_epoch50.pth in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/models
Saved trainer state RegressorTrainer_optimizer_epoch50.pth in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/results/states
Progress stored in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/results


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 53%|█████▎    | 53/100 [00:02<00:02, 21.49it/s]

Ending epoch 51, loss 0.2497928099589305
Ending epoch 52, loss 0.25014408724802034
Ending epoch 53, loss 0.2501022729787741
Ending epoch 54, loss 0.24998828822428043
Ending epoch 55, loss 0.249759442500166


 59%|█████▉    | 59/100 [00:02<00:01, 23.39it/s]

Ending epoch 56, loss 0.250106255481909
Ending epoch 57, loss 0.25001224069981964
Ending epoch 58, loss 0.25002484077268894
Ending epoch 59, loss 0.25012085902261305
Ending epoch 60, loss 0.24984589688949757


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 65%|██████▌   | 65/100 [00:02<00:01, 22.71it/s]

Epoch 60
train MSELoss: 0.250 MSE: 0.249
val MSELoss: 0.250 MSE: 0.249
test MSELoss: 0.250 MSE: 0.249
Ending epoch 61, loss 0.25001486249872157
Ending epoch 62, loss 0.24997786318396661
Ending epoch 63, loss 0.24996899927521612
Ending epoch 64, loss 0.24994401298127733
Ending epoch 65, loss 0.2500109463124662


 68%|██████▊   | 68/100 [00:03<00:01, 23.57it/s]

Ending epoch 66, loss 0.24997561284013697
Ending epoch 67, loss 0.2501689516477757
Ending epoch 68, loss 0.24985964663393861
Ending epoch 69, loss 0.2500718488349571
Ending epoch 70, loss 0.24998611547388472


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 74%|███████▍  | 74/100 [00:03<00:01, 22.60it/s]

Epoch 70
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.248
test MSELoss: 0.250 MSE: 0.249
Ending epoch 71, loss 0.24989892139628128
Ending epoch 72, loss 0.25002396643698754
Ending epoch 73, loss 0.2501059110100205
Ending epoch 74, loss 0.24988166424068245
Ending epoch 75, loss 0.24991143098822585


 80%|████████  | 80/100 [00:03<00:00, 23.97it/s]

Ending epoch 76, loss 0.25011595170777123
Ending epoch 77, loss 0.24987922259816178
Ending epoch 78, loss 0.2500338530218279
Ending epoch 79, loss 0.25006616236390294
Ending epoch 80, loss 0.24994707805616362


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 83%|████████▎ | 83/100 [00:03<00:00, 21.84it/s]

Epoch 80
train MSELoss: 0.251 MSE: 0.249
val MSELoss: 0.251 MSE: 0.249
test MSELoss: 0.253 MSE: 0.251
Ending epoch 81, loss 0.24987266541601302
Ending epoch 82, loss 0.2501569303037884
Ending epoch 83, loss 0.24981908580741366
Ending epoch 84, loss 0.2500564756694141
Ending epoch 85, loss 0.2500336556821256


 89%|████████▉ | 89/100 [00:03<00:00, 23.61it/s]

Ending epoch 86, loss 0.24996823246951574
Ending epoch 87, loss 0.25002428728180964
Ending epoch 88, loss 0.24997191587546924
Ending epoch 89, loss 0.2499977950308774
Ending epoch 90, loss 0.24992995944109048


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
 95%|█████████▌| 95/100 [00:04<00:00, 22.53it/s]

Epoch 90
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.248
test MSELoss: 0.250 MSE: 0.249
Ending epoch 91, loss 0.24993389442160324
Ending epoch 92, loss 0.25005109473928677
Ending epoch 93, loss 0.24993374017444817
Ending epoch 94, loss 0.25018120456386256
Ending epoch 95, loss 0.2500508069186597


100%|██████████| 100/100 [00:04<00:00, 22.48it/s]

Ending epoch 96, loss 0.25012529151396706
Ending epoch 97, loss 0.2501210786469348
Ending epoch 98, loss 0.24994047657326535
Ending epoch 99, loss 0.2500727961997728
Ending epoch 100, loss 0.24996185343007785
Finished training
Epoch 100
train MSELoss: 0.250 MSE: 0.248
val MSELoss: 0.250 MSE: 0.248
test MSELoss: 0.252 MSE: 0.250
Saved PyTorch model state LinearRegressor_epoch100.pth in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/models
Saved trainer state RegressorTrainer_optimizer_epoch100.pth in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/results/states
Progress stored in /Users/carolinezanze/Desktop/abcd5_output/ABCD_sex_prediction_lr/results



  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
