In [1]:
import os
import torch
from torch.utils.data import Subset
from torch.utils.data import DataLoader

from miniMTL.datasets import caseControlDataset
from miniMTL.models import *
from miniMTL.training import Trainer

from miniMTL.hps import HPSModel

  from .autonotebook import tqdm as notebook_tqdm


## Load data

In [2]:
p_pheno = '/Users/harveyaa/Documents/masters/data/pheno_26-01-22.csv'
p_ids = '/Users/harveyaa/Documents/masters/neuropsych_mtl/datasets/cv_folds/intrasite/'
p_conn = os.path.join('/Users/harveyaa/Documents/masters/data/','connectomes')

p_out = ''

In [3]:
# Create datasets
print('Creating datasets...')
cases = ['ASD','BIP','SZ','DEL22q11_2','DUP22q11_2','DEL1q21_1','DUP1q21_1','DEL16p11_2','DUP16p11_2']
#cases = ['ASD','BIP','SZ']
#cases = ['DEL22q11_2']
#cases = ['DEL1q21_1','DUP1q21_1']
data = []
for case in cases:
    print(case)
    data.append(caseControlDataset(case,p_pheno,id_path=p_ids,conn_path=p_conn,
                                    type='conn',strategy='balanced',format=0))
print('Done!\n')

Creating datasets...
ASD


  pheno = pd.read_csv(pheno_path,index_col=0)


BIP


  pheno = pd.read_csv(pheno_path,index_col=0)


SZ


  pheno = pd.read_csv(pheno_path,index_col=0)


DEL22q11_2


  pheno = pd.read_csv(pheno_path,index_col=0)


DUP22q11_2


  pheno = pd.read_csv(pheno_path,index_col=0)


DEL1q21_1


  pheno = pd.read_csv(pheno_path,index_col=0)


DUP1q21_1


  pheno = pd.read_csv(pheno_path,index_col=0)


DEL16p11_2


  pheno = pd.read_csv(pheno_path,index_col=0)


DUP16p11_2
Done!



  pheno = pd.read_csv(pheno_path,index_col=0)


In [4]:
# Split data & create loaders & loss fns
bs = 16

loss_fns = {}
trainloaders = {}
testloaders = {}
decoders = {}
for d, case in zip(data,cases):
    train_idx, test_idx = d.split_data(random=False,fold=4)
    train_d = Subset(d,train_idx)
    test_d = Subset(d,test_idx)
    trainloaders[case] = DataLoader(train_d, batch_size=bs, shuffle=True)
    testloaders[case] = DataLoader(test_d, batch_size=bs, shuffle=True)
    loss_fns[case] = nn.CrossEntropyLoss()
    decoders[case] = eval(f'head{3}().double()')
    
hps = HPSModel(eval(f'encoder{3}().double()'),
                decoders,
                loss_fns)

Initialized HPSModel using: cpu.



In [5]:
# Create optimizer & trainer
optim_hps = torch.optim.Adam(hps.parameters(), lr=1e-3)
trainer_hps = Trainer(optim_hps,log_dir=p_out)

In [6]:
# Train model
trainer_hps.fit(hps,trainloaders,testloaders,num_epochs=50)

Epoch 0: 100%|██████████| 109/109 [00:08<00:00, 12.21it/s]
Epoch 1: 100%|██████████| 109/109 [00:09<00:00, 11.98it/s]
Epoch 2: 100%|██████████| 109/109 [00:08<00:00, 12.18it/s]
Epoch 3: 100%|██████████| 109/109 [00:09<00:00, 12.06it/s]
Epoch 4: 100%|██████████| 109/109 [00:08<00:00, 12.47it/s]
Epoch 5: 100%|██████████| 109/109 [00:08<00:00, 12.65it/s]
Epoch 6: 100%|██████████| 109/109 [00:08<00:00, 12.77it/s]
Epoch 7: 100%|██████████| 109/109 [00:08<00:00, 12.68it/s]
Epoch 8: 100%|██████████| 109/109 [00:08<00:00, 12.57it/s]
Epoch 9: 100%|██████████| 109/109 [00:08<00:00, 12.55it/s]
Epoch 10: 100%|██████████| 109/109 [00:08<00:00, 12.61it/s]
Epoch 11: 100%|██████████| 109/109 [00:08<00:00, 12.62it/s]
Epoch 12: 100%|██████████| 109/109 [00:08<00:00, 12.66it/s]
Epoch 13: 100%|██████████| 109/109 [00:09<00:00, 12.08it/s]
Epoch 14: 100%|██████████| 109/109 [00:09<00:00, 11.88it/s]
Epoch 15: 100%|██████████| 109/109 [00:09<00:00, 12.08it/s]
Epoch 16: 100%|██████████| 109/109 [00:09<00:00, 1

In [7]:
# Evaluate at end
metrics_hps = hps.score(testloaders)
for key in metrics_hps.keys():
    print()
    print(key)
    print('Accuracy: ', metrics_hps[key]['accuracy'])
    print('Loss: ', metrics_hps[key]['loss'])
print()


ASD
Accuracy:  57.97872340425532
Loss:  0.1369718253028779

BIP
Accuracy:  77.41935483870968
Loss:  0.06530408812576415

SZ
Accuracy:  75.59055118110236
Loss:  0.09313200944603715

DEL22q11_2
Accuracy:  76.47058823529412
Loss:  0.06881384382984729

DUP22q11_2
Accuracy:  50.0
Loss:  0.22229325533492122

DEL1q21_1
Accuracy:  30.0
Loss:  0.4331672511722159

DUP1q21_1
Accuracy:  42.857142857142854
Loss:  0.44524299089875496

DEL16p11_2
Accuracy:  41.66666666666667
Loss:  0.3415105960658535

DUP16p11_2
Accuracy:  50.0
Loss:  0.3076967009626213



In [8]:
metrics_hps

{'ASD': {'accuracy': 57.97872340425532, 'loss': 0.1369718253028779},
 'BIP': {'accuracy': 77.41935483870968, 'loss': 0.06530408812576415},
 'SZ': {'accuracy': 75.59055118110236, 'loss': 0.09313200944603715},
 'DEL22q11_2': {'accuracy': 76.47058823529412, 'loss': 0.06881384382984729},
 'DUP22q11_2': {'accuracy': 50.0, 'loss': 0.22229325533492122},
 'DEL1q21_1': {'accuracy': 30.0, 'loss': 0.4331672511722159},
 'DUP1q21_1': {'accuracy': 42.857142857142854, 'loss': 0.44524299089875496},
 'DEL16p11_2': {'accuracy': 41.66666666666667, 'loss': 0.3415105960658535},
 'DUP16p11_2': {'accuracy': 50.0, 'loss': 0.3076967009626213}}

In [17]:
import pandas as pd

p_22q = '/Users/harveyaa/Documents/masters/neuropsych_mtl/datasets/cv_folds/intrasite/DEL22q11_2.csv'
df = pd.read_csv(p_22q,index_col = 0)

In [18]:
df.sum()

fold_0    18.0
fold_1    17.0
fold_2    17.0
fold_3    17.0
fold_4    17.0
dtype: float64

In [19]:
hps.score(testloaders)

{'DEL22q11_2': {'accuracy': 64.70588235294117, 'loss': 0.08235064123016032}}