In [93]:
from hyppo.ksample import KSample
from combat import combat
import pandas as pd
import glob
import os
import graspy as gp
import numpy as np
from scipy.stats import zscore, rankdata

In [200]:
def get_sub(fname):
    stext = os.path.basename(fname).split('_')
    return('{}_{}'.format(stext[1], stext[3]))

def get_sub_pheno_dat(subid, scan, pheno_dat):
    matches = pheno_dat.index[pheno_dat["SUBID"] == int(subid)].tolist()
    match = np.min(matches)
    return(int(pheno_dat.iloc[match]["SEX"]))

def apply_along_dataset(scs, dsets, fn):
    scs_xfmd = np.zeros(scs.shape) 
    for dset in np.unique(dsets):
        scs_xfmd[dsets == dset,:] = np.apply_along_axis(fn, 0, scs[dsets == dset,:])
    return(scs_xfmd)

def ptr(x):
    x_ch = copy.deepcopy(x)
    nz = x[x != 0]
    x_rank = rankdata(nz)*2/(len(nz) + 1)
    x_ch[x != 0] = x_rank
    x_ch = (x_ch - np.min(x_ch))/(np.max(x_ch) - np.min(x_ch))
    return(x_ch)

In [201]:
fmri_dict = {}
pheno_dat = {}

for dataset in ["BNU1", "HNU1"]:
    pheno_dat[dataset] = pd.read_csv('/data/corr/phenotypic/{}_phenotypic_data.csv'.format(dataset))
    scan_dict = {}
    sex_dict = []
    dset_dir = os.path.join('/data/corr/{}/FSL_nff_nsc_gsr_des'.format(dataset), '*.ssv')
    for f in glob.glob(dset_dir):
        gr_dat = gp.utils.import_edgelist(f)
        sub = get_sub(f)
        scan_dict[sub] = gr_dat.flatten()
        scansub = sub.split('_')
        sex_dict.append(get_sub_pheno_dat(scansub[0], scansub[1], pheno_dat[dataset]))
    fmri_dict[dataset] = {}
    fmri_dict[dataset]["scans"] = np.vstack(list(scan_dict.values()))
    fmri_dict[dataset]["subs"] = list(scan_dict.keys())
    fmri_dict[dataset]["sex"] = sex_dict

In [202]:
scans = np.vstack((fmri_dict["BNU1"]["scans"], fmri_dict["HNU1"]["scans"]))
scans = scans[:,~np.all(scans == 0, axis=0)]
scans.flags.writeable = False
sex = np.array(fmri_dict["BNU1"]["sex"] + fmri_dict["HNU1"]["sex"])
datasets = np.array([1 for i in range(0, fmri_dict["BNU1"]["scans"].shape[0])] + 
                    [2 for i in range(0, fmri_dict["HNU1"]["scans"].shape[0])])

# Raw Graphs

## Uncorrected

In [203]:
eff_batch = KSample("DCorr").test(scans[datasets == 1,:], scans[datasets == 2,:])

In [204]:
eff_sex = KSample("DCorr").test(scans[sex == 1,:], scans[sex == 2,:])

In [205]:
eff_sex

(0.08556028936106726, 2.93861984941599e-09)

In [206]:
eff_batch

(0.26217454597493495, 7.874182809594757e-25)

## Combatted

In [207]:
combat_scans = np.array(combat(pd.DataFrame(scans.T), datasets, model=None, numerical_covariates=None)).T

found 2 batches
found 0 numerical covariates...
found 0 categorical variables:	
Standardizing Data across genes.
Fitting L/S model and finding priors
Finding parametric adjustments


Adjusting data


In [208]:
eff_batch = KSample("DCorr").test(combat_scans[datasets == 1,:], combat_scans[datasets == 2,:])

In [209]:
eff_sex = KSample("DCorr").test(combat_scans[sex == 1,:], combat_scans[sex == 2,:])

In [210]:
eff_batch

(-0.013280257169799038, 1.0)

In [211]:
eff_sex

(0.09274982834747669, 6.721249660559353e-10)

## Z-Scored

In [212]:
zsc_scans = apply_along_dataset(scans, datasets, zscore)

In [213]:
eff_batch = KSample("DCorr").test(zsc_scans[datasets == 1,:], zsc_scans[datasets == 2,:])

In [214]:
eff_sex = KSample("DCorr").test(zsc_scans[sex == 1,:], zsc_scans[sex == 2,:])

In [215]:
eff_batch

(-0.027611235151630754, 1.0)

In [216]:
eff_sex

(0.08703107129080727, 2.1725752648207593e-09)

## PTR

In [217]:
scans.max()

0.962954

In [218]:
ptr_scans = apply_along_dataset(scans, datasets, ptr)

In [219]:
eff_batch = KSample("DCorr").test(ptr_scans[datasets == 1,:], ptr_scans[datasets == 2,:])

In [220]:
eff_sex = KSample("DCorr").test(ptr_scans[sex == 1,:], ptr_scans[sex == 2,:])

In [221]:
eff_batch

(-0.03235760957498605, 1.0)

In [222]:
eff_sex

(0.08914316929579671, 1.4083298279827456e-09)

# PTR'd Graphs

## Uncorrected

In [223]:
ptr_scans = np.apply_along_axis(ptr, 1, scans)

In [224]:
eff_batch = KSample("DCorr").test(ptr_scans[datasets == 1,:], ptr_scans[datasets == 2,:])

In [225]:
eff_sex = KSample("DCorr").test(ptr_scans[sex == 1,:], ptr_scans[sex == 2,:])

In [226]:
eff_sex

(0.08043148374437958, 8.433078526608922e-09)

In [227]:
eff_batch

(0.2482625481206262, 1.306452911145757e-23)

## Combatted

In [228]:
combat_scans = np.array(combat(pd.DataFrame(ptr_scans.T), datasets, model=None, numerical_covariates=None)).T

found 2 batches
found 0 numerical covariates...
found 0 categorical variables:	
Standardizing Data across genes.
Fitting L/S model and finding priors
Finding parametric adjustments


Adjusting data


In [229]:
eff_batch = KSample("DCorr").test(combat_scans[datasets == 1,:], combat_scans[datasets == 2,:])

In [230]:
eff_sex = KSample("DCorr").test(combat_scans[sex == 1,:], combat_scans[sex == 2,:])

In [231]:
eff_batch

(-0.024406685867680224, 1.0)

In [232]:
eff_sex

(0.08592079984903656, 2.72889630867019e-09)

## Z-Scored

In [233]:
zsc_scans = apply_along_dataset(ptr_scans, datasets, zscore)

In [234]:
eff_batch = KSample("DCorr").test(zsc_scans[datasets == 1,:], zsc_scans[datasets == 2,:])

In [235]:
eff_sex = KSample("DCorr").test(zsc_scans[sex == 1,:], zsc_scans[sex == 2,:])

In [236]:
eff_batch

(-0.03656364831605591, 1.0)

In [237]:
eff_sex

(0.08691814842951945, 2.2235336819468597e-09)

## PTR

In [238]:
ptrptr_scans = apply_along_dataset(ptr_scans, datasets, zscore)

In [57]:
eff_batch = KSample("DCorr").test(ptrptr_scans[datasets == 1,:], ptrptr_scans[datasets == 2,:])

In [58]:
eff_sex = KSample("DCorr").test(ptrptr_scans[sex == 1,:], ptrptr_scans[sex == 2,:])

In [59]:
eff_batch

(0.24734251950415923, 1.5732197555338792e-23)

In [60]:
eff_sex

(0.08160897482923697, 6.619264566333888e-09)