In [3]:
import os
import tempfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

from miniMTL.datasets import *
from miniMTL.models import *
from miniMTL.util import *
from miniMTL.training import *
from miniMTL.hps import *

In [4]:
def smooth(scalars, weight=0.9):  # Weight between 0 and 1
    last = scalars[0]  # First value in the plot (first timestep)
    smoothed = list()
    for point in scalars:
        smoothed_val = last * weight + (1 - weight) * point  # Calculate smoothed value
        smoothed.append(smoothed_val)                        # Save it
        last = smoothed_val                                  # Anchor the last smoothed value
        
    return smoothed

## Load data

In [5]:
p_pheno = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/pheno_01-12-21.csv'
p_ids = '/home/harveyaa/Documents/masters/neuropsych_mtl/datasets/cv_folds/hybrid'
p_conn = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/connectomes/'

cases = ['SZ',
        #'BIP',
        #'ASD',
        #'DEL22q11_2',
        #'DEL16p11_2',
        #'DUP16p11_2',
        #'DUP22q11_2',
        #'DEL1q21_1',
        #'DUP1q21_1'
        ]

# MTL

In [6]:
# Create datasets
print('Creating datasets...')
data = []
for case in cases:
    print(case)
    #data.append(balancedCaseControlDataset(case,p_ids,p_conn,format=0))
    data.append(caseControlDataset(case,p_pheno,conn_path=p_conn,strategy='stratified',type='conf',format=0))
print('Done!\n')

Creating datasets...
SZ
Done!



  if self.run_code(code, result):


In [7]:
X,y_dict = data[0].__getitem__(0)
X.shape

(58,)

In [8]:
# BALANCED TEST SETS

#batch_size=1
#head=3
#encoder=3
#fold=4
#
#loss_fns = {}
#trainloaders = {}
#testloaders = {}
#decoders = {}
#for d, case in zip(data,cases):
#    train_idx, test_idx = d.split_data(fold)
#    train_d = Subset(d,train_idx)
#    test_d = Subset(d,test_idx)
#    trainloaders[case] = DataLoader(train_d, batch_size=batch_size, shuffle=True)
#    testloaders[case] = DataLoader(test_d, batch_size=batch_size, shuffle=True)
#    loss_fns[case] = nn.CrossEntropyLoss()
#    decoders[case] = eval(f'head{head}().double()')

In [9]:
# RANDOM TEST SETS

batch_size=16
head=4
encoder=4

# Split data & create loaders & loss fns
loss_fns = {}
trainloaders = {}
testloaders = {}
decoders = {}
for d, case in zip(data,cases):
    train_d, test_d = split_data(d)
    trainloaders[case] = DataLoader(train_d, batch_size=batch_size, shuffle=True)
    testloaders[case] = DataLoader(test_d, batch_size=batch_size, shuffle=True)
    loss_fns[case] = nn.CrossEntropyLoss()
    decoders[case] = eval(f'head{head}().double()')

In [10]:
# Create model
model = HPSModel(eval(f'encoder{encoder}().double()'),
                decoders,
                loss_fns)

Initialized HPSModel using: cpu.



In [11]:
log_dir = '/home/harveyaa/Documents/masters/neuropsych_mtl/tmp'
print(log_dir)

/home/harveyaa/Documents/masters/neuropsych_mtl/tmp


In [12]:
num_epochs=100
lr = 0.001

# Create optimizer & trainer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
#scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.05)

#trainer = Trainer(optimizer,lr_scheduler=scheduler,num_epochs=num_epochs,log_dir=log_dir)
trainer = Trainer(optimizer,num_epochs=num_epochs,log_dir=log_dir)

In [13]:
# Train model
trainer.fit(model,trainloaders,testloaders)

Epoch 0: 100%|██████████| 29/29 [00:00<00:00, 33.84it/s]
Epoch 1: 100%|██████████| 29/29 [00:00<00:00, 32.55it/s]
Epoch 2: 100%|██████████| 29/29 [00:00<00:00, 35.04it/s]
Epoch 3: 100%|██████████| 29/29 [00:00<00:00, 36.81it/s]
Epoch 4: 100%|██████████| 29/29 [00:00<00:00, 35.88it/s]
Epoch 5: 100%|██████████| 29/29 [00:00<00:00, 37.31it/s]
Epoch 6: 100%|██████████| 29/29 [00:00<00:00, 36.68it/s]
Epoch 7: 100%|██████████| 29/29 [00:00<00:00, 38.23it/s]
Epoch 8: 100%|██████████| 29/29 [00:00<00:00, 39.84it/s]
Epoch 9: 100%|██████████| 29/29 [00:00<00:00, 39.78it/s]
Epoch 10: 100%|██████████| 29/29 [00:00<00:00, 37.14it/s]
Epoch 11: 100%|██████████| 29/29 [00:00<00:00, 38.28it/s]
Epoch 12: 100%|██████████| 29/29 [00:00<00:00, 37.23it/s]
Epoch 13: 100%|██████████| 29/29 [00:00<00:00, 36.33it/s]
Epoch 14: 100%|██████████| 29/29 [00:00<00:00, 36.14it/s]
Epoch 15: 100%|██████████| 29/29 [00:00<00:00, 36.75it/s]
Epoch 16: 100%|██████████| 29/29 [00:00<00:00, 38.88it/s]
Epoch 17: 100%|█████████

In [11]:
# BALANCED
# SZ 51.59
# BIP 50.0
# ASD 47.3

# RANDOM
# SZ 58.59
# BIP 71.875
# ASD 49.74

# Evaluate at end
metrics = model.score(testloaders)
for key in metrics.keys():
    print()
    print(key)
    print('Accuracy: ', metrics[key]['accuracy'])
    print('Loss: ', metrics[key]['loss'])
print()


SZ
Accuracy:  53.98230088495575
Loss:  0.05245458040815248



In [23]:
np.pad(np.random.randn((2080+58)),2).shape

(2142,)