In [1]:
import os
import tempfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

from miniMTL.datasets import *
from miniMTL.models import *
from miniMTL.util import *
from miniMTL.training import *
from miniMTL.hps import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class encoder3(nn.Module):
    def __init__(self,dim=58,width=10):
        super().__init__()
        # in_channels, out_channels
        self.fc1 = nn.Linear(dim, 256)
        self.batch1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 16)
        self.batch2 = nn.BatchNorm1d(16)

        self.dropout = nn.Dropout()
        self.leaky = nn.LeakyReLU()
    
    def forward(self,x):
        x = self.dropout(self.leaky(self.fc1(x)))
        x = self.batch1(x)
        x = self.dropout(self.leaky(self.fc2(x)))
        x = self.batch2(x)
        return x


class head3(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc3 = nn.Linear(16,2)
        #self.batch3 = nn.BatchNorm1d(16)
        #self.fc4 = nn.Linear(width,2)

        self.dropout = nn.Dropout()
        self.leaky = nn.LeakyReLU()
        #self.softmax = nn.Softmax(dim=1)
    
    def forward(self,x):
        x = self.dropout(self.leaky(self.fc3(x)))
        #x = self.batch3(x)
        #x = self.dropout(F.relu(self.fc4(x)))
        #x = self.softmax(x)
        return x

## Load data

In [3]:
p_pheno = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/pheno_01-12-21.csv'
p_ids = '/home/harveyaa/Documents/masters/neuropsych_mtl/datasets/cv_folds/hybrid'
p_conn = '/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/connectomes/'

cases = ['SZ',
        #'BIP',
        #'ASD',
        'DEL22q11_2',
        #'DEL16p11_2',
        #'DUP16p11_2',
        #'DUP22q11_2',
        #'DEL1q21_1',
        #'DUP1q21_1'
        ]

# Investigate 22q

In [68]:
conn = pd.read_csv('/home/harveyaa/Documents/fMRI/data/ukbb_9cohorts/connectomes_01-12-21.csv',index_col=0)

In [73]:
df_22q = pd.read_csv(os.path.join(p_ids,'DEL22q11_2.csv'),index_col=0)

In [75]:
conn = conn[conn.index.isin(df_22q.index)]

In [99]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

clf = SVC(C=100)
fold = 4

train_idx = df_22q[df_22q[f'fold_{fold}'] == 0].index
test_idx = df_22q[df_22q[f'fold_{fold}'] == 1].index

X_train = conn[conn.index.isin(train_idx)].values
X_test = conn[conn.index.isin(test_idx)].values
y_train = df_22q[df_22q.index.isin(train_idx)]['DEL22q11_2'].values.reshape(-1,1)
y_test = df_22q[df_22q.index.isin(test_idx)]['DEL22q11_2'].values.reshape(-1,1)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

clf.fit(X_train,y_train)
pred = clf.predict(X_test)
accuracy_score(y_test,pred)

(58, 2080)
(28, 2080)
(58, 1)
(28, 1)


  return f(*args, **kwargs)


0.6071428571428571

In [100]:
pheno = pd.read_csv(p_pheno,index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)


In [102]:
conf = ['AGE',
            'SEX',
            'SITE',
            'mean_conn',
            'FD_scrubbed']
case = 'DEL22q11_2'

# PLOT TEST SET
#fig, ax = plt.subplots(len(conf),5,figsize=(15,12))
#for i,c in enumerate(conf):
#        for fold in range(5):
#                #ids = pd.read_csv(os.path.join(temp_dir.name,f"{case}_test_set_{fold}.txt"),header=None)
#                ids = df_22q[df_22q[f'fold_{fold}']==1].index
#                
#                sns.histplot(x=c,data=pheno[pheno.index.isin(ids)],hue=case,bins=25,ax=ax[i,fold])
#                if i == 0:
#                        ax[i,fold].set_title(f'fold {fold}')
#                if fold == 0:
#                        ax[i,fold].set_xlabel('')
#                        ax[i,fold].set_ylabel(c)
#                else:
#                        ax[i,fold].set_xlabel('')
#                        ax[i,fold].set_ylabel('')
#                        ax[i,fold].set_yticklabels([])
#plt.tight_layout()
#plt.subplots_adjust(wspace=0.1,hspace=0.2)
#plt.savefig(os.path.join(args.p_out,f"{case}_test.png"),dpi=300

# PLOT TRAIN SET
#fig, ax = plt.subplots(len(conf),5,figsize=(15,12))
#for i,c in enumerate(conf):
#        for fold in range(5):
#                ids_train = ids = df_22q[df_22q[f'fold_{fold}']==0].index
#                
#                sns.histplot(x=c,data=pheno[pheno.index.isin(ids_train)],hue=case,bins=25,ax=ax[i,fold])
#                if i == 0:
#                        ax[i,fold].set_title(f'fold {fold}')
#                if fold == 0:
#                        ax[i,fold].set_xlabel('')
#                        ax[i,fold].set_ylabel(c)
#                else:
#                        ax[i,fold].set_xlabel('')
#                        ax[i,fold].set_ylabel('')
#                        ax[i,fold].set_yticklabels([])
#plt.tight_layout()
#plt.subplots_adjust(wspace=0.1,hspace=0.2)

# MTL

In [4]:
# Create datasets
print('Creating datasets...')
data = []
for case in cases:
    print(case)
    data.append(balancedCaseControlDataset(case,p_ids,p_conn,format=0))
print('Done!\n')

Creating datasets...
SZ
DEL22q11_2
Done!



In [5]:
# BALANCED TEST SETS

batch_size=4
#head=0
#encoder=0
fold=0
width=100

loss_fns = {}
trainloaders = {}
testloaders = {}
decoders = {}
for d, case in zip(data,cases):
    train_idx, test_idx = d.split_data(fold)
    train_d = Subset(d,train_idx)
    test_d = Subset(d,test_idx)
    trainloaders[case] = DataLoader(train_d, batch_size=batch_size, shuffle=True)
    testloaders[case] = DataLoader(test_d, batch_size=batch_size, shuffle=True)
    loss_fns[case] = nn.CrossEntropyLoss()
    #decoders[case] = eval(f'head{head}().double()')
    decoders[case] = head3().double()

In [6]:
# RANDOM TEST SETS

#batch_size=4
#head=0
#encoder=0
#
## Split data & create loaders & loss fns
#loss_fns = {}
#trainloaders = {}
#testloaders = {}
#decoders = {}
#for d, case in zip(data,cases):
#    train_d, test_d = split_data(d,seed=888)
#    trainloaders[case] = DataLoader(train_d, batch_size=batch_size, shuffle=True)
#    testloaders[case] = DataLoader(test_d, batch_size=batch_size, shuffle=True)
#    loss_fns[case] = nn.CrossEntropyLoss()
#    decoders[case] = eval(f'head{head}().double()')
#    #decoders[case] = head3().double()

In [7]:
# Create model
model = HPSModel(#eval(f'encoder{encoder}().double()'),
                encoder3(dim=2080).double(),
                decoders,
                loss_fns)

Initialized HPSModel using: cpu.



In [8]:
list(model.parameters())[0].grad

In [9]:
#params_pre = []
#for p in model.parameters():
#    params_pre.append(torch.tensor(p[0]))
#params_pre

In [10]:
log_dir = '/home/harveyaa/Documents/masters/neuropsych_mtl/tmp'
print(log_dir)

/home/harveyaa/Documents/masters/neuropsych_mtl/tmp


In [12]:
num_epochs=100
lr = 0.001

# Create optimizer & trainer
optimizer = torch.optim.Adamax(model.parameters(), lr=lr)
trainer = Trainer(optimizer,num_epochs=num_epochs,log_dir=log_dir)

In [13]:
# Train model
trainer.fit(model,trainloaders,testloaders,shuffle=False)

Epoch 0: 100%|██████████| 142/142 [00:11<00:00, 12.51it/s]
Epoch 1: 100%|██████████| 142/142 [00:11<00:00, 12.75it/s]
Epoch 2: 100%|██████████| 142/142 [00:11<00:00, 12.77it/s]
Epoch 3: 100%|██████████| 142/142 [00:11<00:00, 12.88it/s]
Epoch 4: 100%|██████████| 142/142 [00:10<00:00, 13.07it/s]
Epoch 5: 100%|██████████| 142/142 [00:11<00:00, 12.43it/s]
Epoch 6: 100%|██████████| 142/142 [00:11<00:00, 12.54it/s]
Epoch 7: 100%|██████████| 142/142 [00:17<00:00,  8.25it/s]
Epoch 8: 100%|██████████| 142/142 [00:18<00:00,  7.76it/s]
Epoch 9: 100%|██████████| 142/142 [00:12<00:00, 11.31it/s]
Epoch 10: 100%|██████████| 142/142 [00:12<00:00, 11.34it/s]
Epoch 11: 100%|██████████| 142/142 [00:12<00:00, 10.97it/s]
Epoch 12: 100%|██████████| 142/142 [00:14<00:00, 10.01it/s]
Epoch 13: 100%|██████████| 142/142 [00:12<00:00, 10.95it/s]
Epoch 14: 100%|██████████| 142/142 [00:12<00:00, 11.47it/s]
Epoch 15: 100%|██████████| 142/142 [00:12<00:00, 11.27it/s]
Epoch 16: 100%|██████████| 142/142 [00:12<00:00, 1

In [22]:
#params_post = []
#for p in model.parameters():
#    params_post.append(p[0])
#params_post

In [159]:
print(len(params_pre))
for i in range(len(params_pre)):
    print((params_pre[i]==params_post[i]).sum().item())
    print(params_pre[i].size())
    print()

10
0
torch.Size([2080])

0
torch.Size([])

0
torch.Size([])

0
torch.Size([])

0
torch.Size([256])

0
torch.Size([])

0
torch.Size([])

0
torch.Size([])

0
torch.Size([10])

0
torch.Size([])



In [160]:
# BALANCED
# SZ 51.59
# BIP 50.0
# ASD 47.3

# RANDOM
# SZ 58.59
# BIP 71.875
# ASD 49.74

# Evaluate at end
metrics = model.score(testloaders)
for key in metrics.keys():
    print()
    print(key)
    print('Accuracy: ', metrics[key]['accuracy'])
    print('Loss: ', metrics[key]['loss'])
print()


SZ
Accuracy:  51.587301587301596
Loss:  0.04392023539256849



In [161]:
for X, Y_dict in iter(trainloaders['SZ']):
    pred = model.forward(X,['SZ'])['SZ']
    print(pred.argmax(1))
    print(Y_dict['SZ'])
    print()

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1])

tensor([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0])

tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])
tensor([1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([1, 1, 0, 0, 1, 

In [136]:
X, Y_dict = next(iter(trainloaders['SZ']))
X

tensor([[ 0.2205,  0.7734,  0.3376,  ...,  0.4517,  0.7568,  0.2864],
        [ 0.3252,  0.7770,  0.4318,  ...,  0.5235,  0.4512,  0.2063],
        [ 0.2663,  1.0461,  0.5479,  ...,  0.0115, -0.2313,  0.2665],
        ...,
        [ 0.2632,  0.6835,  0.2740,  ...,  0.7039,  0.3534,  0.2338],
        [ 0.1444,  0.4784,  0.2277,  ...,  0.2569,  0.5483,  0.2190],
        [ 0.2306,  0.6204,  0.2626,  ...,  0.4079,  0.7664,  0.2759]],
       dtype=torch.float64)

In [142]:
dropout = nn.Dropout(p=0.001)
leaky = nn.LeakyReLU()

#X_1 = dropout(F.relu(model.encoder.fc1(X)))
#X_2 = dropout(F.relu(model.encoder.fc2(X_1)))
#X_3 = dropout(F.relu(model.decoders['SZ'].fc3(X_2)))

X_1 = leaky(model.encoder.fc1(X))
X_2 = leaky(model.encoder.fc2(X_1))
X_3 = leaky(model.decoders['SZ'].fc3(X_2))
X_3

tensor([[-313.3458, -115.3427],
        [-251.3480,  -92.2653],
        [-128.5347,  -47.3491],
        [-154.7057,  -56.9959],
        [-277.0331, -102.1275],
        [-267.5116,  -96.1504],
        [-110.4147,  -40.1776],
        [-194.1211,  -71.5230],
        [-262.9967,  -96.4557],
        [-258.9930,  -96.4075],
        [-230.1875,  -85.4877],
        [-307.7387, -112.7890],
        [ -94.0132,  -34.1477],
        [-272.7833, -100.4545],
        [-265.4162,  -97.0490],
        [-299.4902, -109.9600]], dtype=torch.float64,
       grad_fn=<LeakyReluBackward0>)

In [14]:
import shutil

shutil.rmtree(log_dir)