In [1]:
# Load BSNIP

import pickle
import numpy as np
from pathlib import Path

bsnipdir = '/home/anton/Documents/Tulane/Research/ImageNomer/data/anton/cohorts/BSNIP'
bsnipdemo = pickle.load(open(f'{bsnipdir}/demographics.pkl', 'rb'))

grpdict = dict(SZP=0,SZR=1,BPP=2,BPR=3,SADP=4,SADR=5,NC=6)
bsnipgrp = []
bsnipfc = []

for sub in bsnipdemo['DXGROUP_2']:
    for task in ['unk']:
        if bsnipdemo['DXGROUP_2'][sub] not in grpdict:
            continue
        f = f'{bsnipdir}/fc/{sub}_task-{task}_fc.npy'
        if not Path(f).exists():
            continue
        p = np.load(f)
        g = grpdict[bsnipdemo['DXGROUP_2'][sub]]
        bsnipfc.append(p)
        bsnipgrp.append(g)
        
bsnipgrp = np.stack(bsnipgrp)
bsnipfc = np.stack(bsnipfc)

print(bsnipgrp.shape)
print(bsnipfc.shape)

(1246,)
(1246, 34716)


In [2]:
# Bring in (new) PNC cohort

import pickle
from pathlib import Path
import numpy as np

newdir = '/home/anton/Documents/Tulane/Research/ImageNomer/data/anton/cohorts/PNC'
newdemo = pickle.load(open(f'{newdir}/demographics.pkl', 'rb'))

newfc = []

for sub in newdemo['age_at_cnb']:
    for task in ['rest', 'nback', 'emoid']:
        f = f'{newdir}/fc/{sub}_task-{task}_fc.npy'
        if not Path(f).exists():
            continue
        p = np.load(f)
        newfc.append(p)
        
newfc = np.stack(newfc)
newgrp = 6*np.ones(newfc.shape[0])

print(newgrp.shape)
print(newfc.shape)

(4343,)
(4343, 34716)


In [28]:
# Bring in MCIC data

import pickle
import numpy as np

mcicdir = '/home/anton/Documents/Tulane/Research/ImageNomer/data/anton/cohorts/MCIC'
mcicdemo = pickle.load(open(f'{mcicdir}/demographics.pkl', 'rb'))

mcicfc = []
mcicgrp = []

for sub in mcicdemo['Neuroleptic_Naive']:
    try:
        p = np.load(f'{mcicdir}/fc/{sub}_task-combined_fc.npy')
    except:
        continue
    d = mcicdemo['Neuroleptic_Naive'][sub] == 'T'
    if d:
        mcicgrp.append(6)
    else:
        mcicgrp.append(0)
    mcicfc.append(p)
    
mcicfc = np.stack(mcicfc)
mcicgrp = np.array(mcicgrp)

print(mcicfc.shape)
print(mcicgrp.shape)

(210, 34716)
(210,)


In [43]:
# Check adding relatives to training set improves AUC without improving accuracy on test

import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, confusion_matrix, ConfusionMatrixDisplay

import sys

latsimdir = '/home/anton/Documents/Tulane/Research/LatentSimilarity'

if latsimdir not in sys.path:
    sys.path.append(latsimdir)
    
from latsim import LatSimClf

def cat(lst):
    return np.concatenate(lst)

def select(fc, labs, lab, pcnt):
    x = fc[np.where(labs == lab)[0]]
    n = x.shape[0]
    idcs = np.random.permutation(n)
    if pcnt == 1:
        return x[idcs]
    m = int(n*pcnt)
    return x[idcs][:m], x[idcs][m:]

def combine(grp1, grp2, n1=None, n2=None):
    if n1 is None:
        an = sum([a.shape[0] for a in grp1])
    else:
        an = n1*len(grp1)
        grp1 = [a[:n1] for a in grp1]
    if n2 is None:
        bn = sum([b.shape[0] for b in grp2])
    else:
        bn = n2*len(grp2)
        grp2 = [b[:n2] for b in grp2]
    bn = sum([b.shape[0] for b in grp2])
    return cat([cat(grp1), cat(grp2)]), cat([np.zeros(an), np.ones(bn)])

rocs = []

for _ in range(20):
    sztr, szt = select(bsnipfc, bsnipgrp, 0, 0.5)
    szr = select(bsnipfc, bsnipgrp, 1, 1)
    bpp = select(bsnipfc, bsnipgrp, 2, 1)
    bpr = select(bsnipfc, bsnipgrp, 3, 1)
    sadr = select(bsnipfc, bsnipgrp, 5, 1)
#     pnctr, pnct = select(newfc, newgrp, 6, 0.5)
    mcsztr, mcszt = select(mcicfc, mcicgrp, 0, 0.5)
    mcnctr, mcnct = select(mcicfc, mcicgrp, 6, 0.5)
    nctr, nct = select(bsnipfc, bsnipgrp, 6, 0.5)

    xtr, ytr = combine([mcsztr], [mcnctr], 50, 50)
    xt, yt = combine([mcszt], [mcnct], 50, 50)

    print([a.shape for a in [xtr, xt, ytr, yt]])

#     clf = LatSimClf().fit(xtr, ytr) 
    clf = LogisticRegression(C=100, max_iter=1000).fit(xtr, ytr)
    yhat = clf.predict(xt)
    yprob = clf.predict_proba(xt)
    print(np.mean(yt == yhat))
    r = roc_auc_score(yt, yprob[:,1])
    rocs.append(r)
    print(r)
    print(np.mean(yhat[yt==0] == 0))
    print(np.mean(yhat[yt==1] == 1))
    print('---')
    
print(np.mean(rocs))
print(np.std(rocs))

[(100, 34716), (100, 34716), (100,), (100,)]
0.67
0.7764
0.7
0.64
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.68
0.7456
0.7
0.66
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.65
0.7188
0.58
0.72
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.64
0.7372
0.52
0.76
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.61
0.7496
0.58
0.64
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.64
0.7575999999999999
0.66
0.62
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.67
0.7452
0.6
0.74
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.74
0.8036000000000001
0.64
0.84
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.69
0.7544
0.78
0.6
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.71
0.752
0.62
0.8
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.63
0.7040000000000001
0.66
0.6
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.71
0.8063999999999999
0.6
0.82
---
[(100, 34716), (100, 34716), (100,), (100,)]
0.67
0.7652000000000001
0.72
0.62
---
[(100, 34716), (100, 3471

In [46]:
# Bring in (old fmriprep) PNC cohort

olddir = '/home/anton/Documents/Tulane/Research/ImageNomer/data/anton/cohorts/test'
olddemo = pickle.load(open(f'{olddir}/demographics.pkl', 'rb'))

oldfc = []

for sub in olddemo['age']:
    for task in ['rest', 'nback', 'emoid']:
        f = f'{olddir}/fc/{sub}_task-{task}_fc.npy'
        if not Path(f).exists():
            continue
        p = np.load(f)
        oldfc.append(p)
        
oldfc = np.stack(oldfc)
oldgrp = 6*np.ones(oldfc.shape[0])

print(oldgrp.shape)
print(oldfc.shape)

(2490,)
(2490, 34716)
