# Redo FC for 20 in each of age, sex, wrat with TR=3 sec

In [1]:
# Using newly preprocessed subjects

import pickle

metadictname = '/home/anton/Documents/Tulane/Research/PNC_Good/PNC_agesexwrat.pkl'
alltsname = '/home/anton/Documents/Tulane/Research/PNC_Good/PNC_PowerTS_float2.pkl'

with open(metadictname, 'rb') as f:
    metadict = pickle.load(f)

with open(alltsname, 'rb') as f:
    allts = pickle.load(f)
    
print(list(metadict.keys()))
print(list(allts.keys()))
print('Complete')

['age', 'sex', 'wrat', 'missingage', 'missingsex', 'missingwrat', 'failedqc']
['emoid', 'nback', 'rest']
Complete


In [2]:
'''
Get subjects that have all tasks and paras specified
Functions for creating independent and response variables
'''

import numpy as np

def get_subs(allts, metadict, tasks, paras):
    # Get subs for all paras
    for i,para in enumerate(paras):
        tmpset = set([int(sub[4:]) for sub in allts[para].keys()])
        if i == 0:
            paraset = tmpset
        else:
            paraset = paraset.intersection(tmpset)
    # Get subs for all tasks
    for i,task in enumerate(tasks):
        tmpset = set([sub for sub in metadict[task].keys()])
        if i == 0:
            taskset = tmpset
        else:
            taskset = paraset.intersection(tmpset)
    # Remove QC failures
    allsubs = taskset.intersection(paraset)
    for badsub in metadict['failedqc']:
        try:
            allsubs.remove(int(badsub[4:]))
        except:
            pass
    return list(allsubs)

def get_X(allts, paras, subs):
    X = []
    for para in paras:
        pX = [allts[para][f'sub-{sub}'] for sub in subs]
        pX = np.stack(pX)
        X.append(pX)
    return X

def get_y(metadict, tasks, subs):
    y = []
    for task in tasks:
        if task == 'age' or task == 'wrat':
            var = [metadict[task][sub] for sub in subs]
            var = np.array(var)
            y.append(var)
        if task == 'sex':
            maleness = [metadict[task][sub] == 'M' for sub in subs]
            maleness = np.array(maleness)
            sex = np.stack([maleness, 1-maleness], axis=1)
            y.append(sex)
    return y

subs = get_subs(allts, metadict, ['wrat'], ['rest', 'nback', 'emoid'])
print(len(subs))

X = get_X(allts, ['rest', 'nback', 'emoid'], subs)
print(X[0].shape)

830
(830, 264, 124)


In [29]:
# TS to condensed FC

from scipy import signal

def butter_bandpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = [cutoff[0] / nyq, cutoff[1] / nyq]
    b, a = signal.butter(order, normal_cutoff, btype='band', analog=False)
    return b, a

def butter_bandpass_filter(data, cutoff, fs, order=5):
    b, a = butter_bandpass(cutoff, fs, order=order)
    y = signal.filtfilt(b, a, data)
    return y

tr = 3

def filter_design_ts(X):
    Xs = []
    for i in range(X.shape[0]):
        nX = butter_bandpass_filter(X[i], [0.01, 0.15], 1/tr)
        Xs.append(nX)
    return np.stack(Xs)

def ts_to_flat_fc(X):
    p = np.corrcoef(X)
    a,b = np.triu_indices(p[0].shape[0], 1)
    p = p[a,b]
    return p

p = [np.stack([ts_to_flat_fc(ts) for ts in filter_design_ts(Xp)]) for Xp in X]
# Xfiltnorm = [tsmod/np.linalg.norm(tsmod, axis=(-1), keepdims=True) for tsmod in ts]
print(p[0].shape)

(830, 34716)


In [33]:
# Age wrat prediction

import torch
import torch.nn as nn

mseLoss = nn.MSELoss()

modidx = 2
mod = 'emoid'
task = "age"
sm=0

for ii in range(20):

    ntrain = 700
    idcs = torch.randperm(p[modidx].shape[0])

    x = torch.from_numpy(p[modidx]).float().cuda()
    x = x[idcs]
    xtr = x[:ntrain]
    xt = x[ntrain:]

    y = get_y(metadict, [task], subs)[0]
    y = torch.from_numpy(y).float().cuda()
    y = y[idcs]
    ytr = y[:ntrain]
    yt = y[ntrain:]
    mu = torch.mean(ytr)
    ytr = ytr - mu
    yt = yt - mu

    def toDict(w, acc):
        dct = dict(w=w.detach().cpu().numpy(), 
                   trsubs=sorted([subs[i] for i in idcs[:ntrain]]),
                   tsubs=sorted([subs[i] for i in idcs[ntrain:]]),
                   desc=f"Least squares FC {task} {mod} rmse: {float(acc)}")
        return dct

    def save(dct, dr, idx):
        base = f"/home/anton/Documents/Tulane/Research/ImageNomer/data/anton/cohorts/test/weights"
        with open(f"{base}/{dr}/{mod}{idx}.pkl", 'wb') as f:
            pickle.dump(dct, f)

    w, _, _, _ = torch.linalg.lstsq(xtr, ytr)
    yhat = xt@w
    acc = mseLoss(yhat, yt)**0.5

    print(acc)
    sm += acc/20
    save(toDict(w,acc), f'{task}_mean_zero', ii)
    print(f'Done {ii}')
    
print('---')
print(sm)

tensor(24.3337, device='cuda:0')
Done 0
tensor(27.7029, device='cuda:0')
Done 1
tensor(27.0332, device='cuda:0')
Done 2
tensor(23.8629, device='cuda:0')
Done 3
tensor(26.9305, device='cuda:0')
Done 4
tensor(27.1087, device='cuda:0')
Done 5
tensor(25.4601, device='cuda:0')
Done 6
tensor(28.2102, device='cuda:0')
Done 7
tensor(24.5468, device='cuda:0')
Done 8
tensor(22.8939, device='cuda:0')
Done 9
tensor(24.0622, device='cuda:0')
Done 10
tensor(26.3964, device='cuda:0')
Done 11
tensor(28.3612, device='cuda:0')
Done 12
tensor(26.3250, device='cuda:0')
Done 13
tensor(27.2255, device='cuda:0')
Done 14
tensor(23.1304, device='cuda:0')
Done 15
tensor(28.2594, device='cuda:0')
Done 16
tensor(27.0746, device='cuda:0')
Done 17
tensor(27.1884, device='cuda:0')
Done 18
tensor(25.4959, device='cuda:0')
Done 19
---
tensor(26.0801, device='cuda:0')


In [22]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

modidx = 2
mod = 'emoid'
task = "sex"
sm = 0

for ii in range(20):
    ntrain = 700
    idcs = np.arange(p[modidx].shape[0])
    np.random.shuffle(idcs)

    x = p[modidx]
    x = x[idcs]
    xtr = x[:ntrain]
    xt = x[ntrain:]

    y = get_y(metadict, [task], subs)[0][:,0]
    y = y[idcs]
    ytr = y[:ntrain]
    yt = y[ntrain:]

    def toDict(w, acc, conf):
        if isinstance(w, torch.Tensor):
            w = w.detach().cpu().numpy()
        if not isinstance(w, np.ndarray):
            raise Exception('Not an ndarray!')
        dct = dict(w=w, 
                   trsubs=sorted([subs[i] for i in idcs[:ntrain]]),
                   tsubs=sorted([subs[i] for i in idcs[ntrain:]]),
                   desc=f"Logistic regression FC {task} {mod} acc: {float(acc)} conf: {conf}")
        return dct

    def save(dct, dr, idx):
        base = f"/home/anton/Documents/Tulane/Research/ImageNomer/data/anton/cohorts/test/weights"
        with open(f"{base}/{dr}/{mod}{idx}.pkl", 'wb') as f:
            pickle.dump(dct, f)

    clf = LogisticRegression(max_iter=1000, penalty='l2', C=1, solver='lbfgs').fit(xtr, ytr)
    yhat = clf.predict(xt)
    acc = np.sum(yhat == yt)/len(yt)
    print(acc)

    mat = confusion_matrix(yt, yhat, normalize='true', labels=[0,1])
    print(mat)

    save(toDict(clf.coef_,acc,mat), f'{task}', ii)
    print(f'Done {ii}')
    sm += acc/20
    
print('---')
print(sm)

0.7615384615384615
[[0.84285714 0.15714286]
 [0.33333333 0.66666667]]
Done 0
0.8076923076923077
[[0.86666667 0.13333333]
 [0.27272727 0.72727273]]
Done 1
0.7153846153846154
[[0.64864865 0.35135135]
 [0.19642857 0.80357143]]
Done 2
0.823076923076923
[[0.8115942  0.1884058 ]
 [0.16393443 0.83606557]]
Done 3
0.7538461538461538
[[0.86666667 0.13333333]
 [0.34285714 0.65714286]]
Done 4
0.7846153846153846
[[0.76       0.24      ]
 [0.18181818 0.81818182]]
Done 5
0.7384615384615385
[[0.72058824 0.27941176]
 [0.24193548 0.75806452]]
Done 6
0.7307692307692307
[[0.78125    0.21875   ]
 [0.31818182 0.68181818]]
Done 7
0.8
[[0.77464789 0.22535211]
 [0.16949153 0.83050847]]
Done 8
0.7307692307692307
[[0.70149254 0.29850746]
 [0.23809524 0.76190476]]
Done 9
0.8307692307692308
[[0.86075949 0.13924051]
 [0.21568627 0.78431373]]
Done 10
0.8
[[0.83333333 0.16666667]
 [0.234375   0.765625  ]]
Done 11
0.8
[[0.82432432 0.17567568]
 [0.23214286 0.76785714]]
Done 12
0.823076923076923
[[0.88       0.12      ]

In [48]:
# Get accuracy stuff

from pathlib import Path
import pickle
import re

base = f"/home/anton/Documents/Tulane/Research/ImageNomer/data/anton/cohorts/test/weights/partial/wrat_mean_zero"

sm = 0
vec = []
for f in Path(base).iterdir():
    if 'rest' in f.name:
        with open(f'{base}/{f.name}', 'rb') as f:
            dct = pickle.load(f)
            mobj = re.match('.*rmse: ([\d.]+).*', dct['desc'])
            print(dct['desc'])
            print(mobj.group(1))
            v = float(mobj.group(1))
            vec.append(v)
            sm += float(mobj.group(1))/20
            
print(sm)
print(np.std(np.array(vec)))

Least squares partial corr wrat rest rmse: 15.840668678283691
15.840668678283691
Least squares partial corr wrat rest rmse: 14.328940391540527
14.328940391540527
Least squares partial corr wrat rest rmse: 17.752723693847656
17.752723693847656
Least squares partial corr wrat rest rmse: 14.846572875976562
14.846572875976562
Least squares partial corr wrat rest rmse: 15.849759101867676
15.849759101867676
Least squares partial corr wrat rest rmse: 15.805267333984375
15.805267333984375
Least squares partial corr wrat rest rmse: 14.116043090820312
14.116043090820312
Least squares partial corr wrat rest rmse: 15.236705780029297
15.236705780029297
Least squares partial corr wrat rest rmse: 15.065845489501953
15.065845489501953
Least squares partial corr wrat rest rmse: 15.535000801086426
15.535000801086426
Least squares partial corr wrat rest rmse: 15.19347095489502
15.19347095489502
Least squares partial corr wrat rest rmse: 14.742619514465332
14.742619514465332
Least squares partial corr wra