Towards 3D Deep Learning for neuropsychiatry: predicting Autism diagnosis using an interpretable Deep Learning pipeline applied to minimally processed structural MRI data, Melanie Garcia, Clare Kelly. medRxiv 2022.10.18.22281196; doi: https://doi.org/10.1101/2022.10.18.22281196

Github: https://github.com/garciaml/Autism-3D-CNN-brain-sMRI?tab=readme-ov-file

In [None]:
# Activate Virtual Environment and Install Requirements
#!python3 -m venv ../pretrainedresnet2
#!source ../pretrainedresnet2/bin/activate
#!python3 -m ipykernel install --user --name=pretrainedresnet2 --display-name "Python (pretrainedresnet2)"
#Switch to notebook/virtual environment kernel
#!pip install -r requirements.txt # install requirements text in new environment
#!pip install "torchio>=0.19.0"
#!pip install monai
#!pip install tensorboard
#!pip install torchsummary

In [2]:
import os
import shutil
import pandas as pd

# Test Paper Reproducibility

In [3]:
def copy_with_structure(source_folder, destination_folder):
    """Copies the contents of source_folder to destination_folder, maintaining the directory structure."""

    for item in os.listdir(source_folder):
        source_path = os.path.join(source_folder, item)
        destination_path = os.path.join(destination_folder, item)

        if os.path.isfile(source_path):
            shutil.copy2(source_path, destination_path)  # copy file with metadata
        elif os.path.isdir(source_path):
             shutil.copytree(source_path, destination_path, dirs_exist_ok=True) # copy directory and its contents
        else:
            print(f"Skipping {source_path}, not a file or directory.")

In [4]:
def count_folders(path):
    folder_count = 0
    for item in os.listdir(path):
        if os.path.isdir(os.path.join(path, item)):
            folder_count += 1
    return folder_count

In [9]:
def count_files(directory_path):
    """Counts the number of files in a directory.

    Args:
        directory_path: The path to the directory.

    Returns:
        The number of files in the directory, or -1 if the directory does not exist.
    """
    if not os.path.exists(directory_path):
        return -1
    
    file_count = 0
    for item in os.listdir(directory_path):
        item_path = os.path.join(directory_path, item)
        if os.path.isfile(item_path):
            file_count += 1
    return file_count

In [11]:
# Model Training - check if can reporduce results from paper (train to 42 epochs with similar split)

#Copy ABIDE1 and ABIDE2 images to ABIDE_COMBINED folder in JustBrain_Data folder
source_folder1 = "JustBrain_Data/ABIDE1"
source_folder2 = "JustBrain_Data/ABIDE2"
destination_folder = "JustBrain_Data/ABIDE_COMBINED"

copy_with_structure(source_folder1, destination_folder)
copy_with_structure(source_folder2, destination_folder)


#Combine ABIDEI and ABIDE2 tsv files
participants1_tsv = pd.read_csv('JustBrain_Data/ABIDE1/participants.tsv', sep="\t", dtype=str)
participants2_tsv = pd.read_csv('JustBrain_Data/ABIDE2/participants.tsv', sep="\t", dtype=str)
participants_ABIDE = pd.concat([participants1_tsv,participants2_tsv])
participants_ABIDE.to_csv('JustBrain_Data/ABIDE_COMBINED/participants.tsv', sep='\t', index=False, header=True)

#Check
print(count_folders(source_folder1) + count_folders(source_folder2))
print(count_folders(destination_folder))
print(participants_ABIDE.shape[0]) # - looks good!


#Copy ABIDE1 and ABIDE2 images to ABIDE_COMBINED folder in Preprocessed_Data folder
source_folder1 = "Preprocessed_Data/ABIDE1"
source_folder2 = "Preprocessed_Data/ABIDE2"
destination_folder = "Preprocessed_Data/ABIDE_COMBINED"

copy_with_structure(source_folder1, destination_folder)
copy_with_structure(source_folder2, destination_folder)

#Check
test_folder = count_files("Preprocessed_Data/ABIDE_COMBINED/test")
train_folder = count_files("Preprocessed_Data/ABIDE_COMBINED/train")
val_folder = count_files("Preprocessed_Data/ABIDE_COMBINED/val")
print(test_folder + train_folder + val_folder)

6
3
2112
2112


In [2]:
# Start training from scratch
# batch size = 4 (6 too large)
# learning rate = 0.0001
# seed = 42

In [None]:
!python ../Autism-3D-CNN-brain-sMRI/train_medicalnet.py 'JustBrain_Data/ABIDE_COMBINED' 'Preprocessed_Data/ABIDE_COMBINED' './outputs/Resnet50/ABIDE_Combined' '../Autism-3D-CNN-brain-sMRI/resnet_training/resnet_50.pth'

  scaler = GradScaler()
  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
Loading pretrained model weights selectively (backbone)
  pretrained_weights = torch.load(pretrain_path, map_location=device)
----------
epoch 1
  with autocast():
1/424, train_loss: 0.5798
2/424, train_loss: 0.5303
3/424, train_loss: 1.1646
4/424, train_loss: 1.7541
5/424, train_loss: 0.9332
6/424, train_loss: 0.6520
7/424, train_loss: 2.0708
8/424, train_loss: 1.2679
9/424, train_loss: 0.7920
10/424, train_loss: 0.6910
11/424, train_loss: 0.8742
12/424, train_loss: 0.8633
13/424, train_loss: 0.6171
14/424, train_loss: 0.7828
15/424, train_loss: 0.5452
16/424, train_loss: 0.6900
17/424, train_loss: 1.2921
18/424, train_loss: 0.8975
19/424, train_loss: 1.0314
20/424, train_loss: 0.7176
21/424, train_loss: 0.8488
22/424, train_loss: 1.1240
23/424, train_loss: 0.2069
24/424, train_loss: 0.8436
25/424, train_loss: 1.1680
26/424, train_loss: 0.9736
27/424, train_loss: 0.7531
28/424, train_loss: 0.7423
29/

In [None]:
# Resume training from saved model
#!python ../Autism-3D-CNN-brain-sMRI/train_medicalnet.py 'JustBrain_Data/ABIDE_COMBINED' 'Preprocessed_Data/ABIDE_COMBINED' './outputs/Resnet50/ABIDE_Combined' 'outputs/Resnet50/ABIDE_Combined/checkpoint_1.pth' --resume

In [None]:
with open("outputs/Resnet50/ABIDE_Combined/training.log", "r") as infile, open("outputs/Resnet50/ABIDE_Combined/filtered.log", "w") as outfile:
    for line in infile:
        if line.startswith("epoch ") and "average loss" in line:
            outfile.write(line)
        #elif line.startswith("Epoch time duration:"):
            #outfile.write(line)
        elif line.startswith("current epoch:"):
            outfile.write(line)

# ABIDEII

## Test

In [4]:
participants_tsv = pd.read_csv('JustBrain_Data/ABIDE_COMBINED/participants.tsv', sep="\t", dtype=str)
participants_tsv.rename(columns={"participant_id" : "SUB_ID"}, inplace=True)
participants_tsv = participants_tsv[participants_tsv.dataset == 'test']
participants_tsv.to_csv('outputs/Resnet50/ABIDE_Combined/test/subjects.csv', index = False)

In [5]:
# Predictions - 18 epochs (best accuracy) - overfit - predicts 0 for everything
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/test' 'outputs/Resnet50/ABIDE_Combined/test/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_18.pth' './outputs/Resnet50/ABIDE_Combined/test'

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
  pretrain = torch.load(pretrain_path)
evaluation metric: 0.48695652173913045


In [6]:
# Predictions - 12 epochs (second best accuracy) - predicts 1 for everything
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/test' 'outputs/Resnet50/ABIDE_Combined/test/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_12.pth' './outputs/Resnet50/ABIDE_Combined/test'

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
  pretrain = torch.load(pretrain_path)
evaluation metric: 0.5130434782608696


In [7]:
# Predictions - 10 epochs (third best accuracy) - predicts 1 for everything
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/test' 'outputs/Resnet50/ABIDE_Combined/test/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_10.pth' './outputs/Resnet50/ABIDE_Combined/test'

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
  pretrain = torch.load(pretrain_path)
evaluation metric: 0.5130434782608696


In [8]:
# Predictions - 6 epochs (fourth best accuracy)
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/test' 'outputs/Resnet50/ABIDE_Combined/test/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_6.pth' './outputs/Resnet50/ABIDE_Combined/test'

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
  pretrain = torch.load(pretrain_path)
evaluation metric: 0.5130434782608696


## Val

In [6]:
participants_tsv = pd.read_csv('JustBrain_Data/ABIDE_COMBINED/participants.tsv', sep="\t", dtype=str)
participants_tsv.rename(columns={"participant_id" : "SUB_ID"}, inplace=True)
participants_tsv = participants_tsv[participants_tsv.dataset == 'val']
participants_tsv.to_csv('outputs/Resnet50/ABIDE_Combined/validation/subjects.csv', index = False)

In [None]:
# Predictions - 36 epochs (predictions all 0) - overfit
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/val' 'outputs/Resnet50/ABIDE_Combined/validation/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_36.pth' './outputs/Resnet50/ABIDE_Combined/validation'

In [None]:
# Predictions - 12 epochs
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/val' 'outputs/Resnet50/ABIDE_Combined/validation/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_12.pth' './outputs/Resnet50/ABIDE_Combined/validation'

## Train

In [None]:
participants_tsv = pd.read_csv('JustBrain_Data/ABIDE_COMBINED/participants.tsv', sep="\t", dtype=str)
participants_tsv.rename(columns={"participant_id" : "SUB_ID"}, inplace=True)
participants_tsv = participants_tsv[participants_tsv.dataset == 'train']
participants_tsv.to_csv('outputs/Resnet50/ABIDE_Combined/train/subjects.csv', index = False)

In [10]:
# Predictions - 36 epochs (predictions all 0) - overfit
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/train' 'outputs/Resnet50/ABIDE_Combined/train/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_36.pth' './outputs/Resnet50/ABIDE_Combined/train'

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
  pretrain = torch.load(pretrain_path)
evaluation metric: 0.5294464075382803


In [None]:
# Predictions - 12 epochs
!python ../Autism-3D-CNN-brain-sMRI/predict_medicalnet_subids.py 'Preprocessed_Data/ABIDE_COMBINED/train' 'outputs/Resnet50/ABIDE_Combined/train/subjects.csv' './outputs/Resnet50/ABIDE_Combined/checkpoint_12.pth' './outputs/Resnet50/ABIDE_Combined/train'