In [35]:
from hyppo.ksample import KSample
from combat import combat
import pandas as pd
import glob
import os
import graspy as gp
import numpy as np
from scipy.stats import zscore, rankdata

In [2]:
def get_sub(fname):
    stext = os.path.basename(fname).split('_')
    return('{}_{}'.format(stext[1], stext[3]))

def get_sub_pheno_dat(subid, scan, pheno_dat):
    matches = pheno_dat.index[pheno_dat["SUBID"] == int(subid)].tolist()
    match = np.min(matches)
    return(int(pheno_dat.iloc[match]["SEX"]))

In [3]:

fmri_dict = {}
pheno_dat = {}

for dataset in ["BNU1", "HNU1"]:
    pheno_dat[dataset] = pd.read_csv('/data/corr/phenotypic/{}_phenotypic_data.csv'.format(dataset))
    scan_dict = {}
    sex_dict = []
    dset_dir = os.path.join('/data/corr/{}/FSL_nff_nsc_gsr_des'.format(dataset), '*.ssv')
    for f in glob.glob(dset_dir):
        gr_dat = gp.utils.import_edgelist(f)
        sub = get_sub(f)
        scan_dict[sub] = gr_dat.flatten()
        scansub = sub.split('_')
        sex_dict.append(get_sub_pheno_dat(scansub[0], scansub[1], pheno_dat[dataset]))
    fmri_dict[dataset] = {}
    fmri_dict[dataset]["scans"] = np.vstack(list(scan_dict.values()))
    fmri_dict[dataset]["subs"] = list(scan_dict.keys())
    fmri_dict[dataset]["sex"] = sex_dict

In [94]:
scans = np.vstack((fmri_dict["BNU1"]["scans"], fmri_dict["HNU1"]["scans"]))
scans = scans[:,~np.all(scans == 0, axis=0)]
sex = np.array(fmri_dict["BNU1"]["sex"] + fmri_dict["HNU1"]["sex"])
datasets = np.array([1 for i in range(0, fmri_dict["BNU1"]["scans"].shape[0])] + [2 for i in range(0, fmri_dict["HNU1"]["scans"].shape[0])])

# Raw Graphs

## Uncorrected

In [95]:
eff_batch = KSample("DCorr").test(scans[datasets == 1,:], scans[datasets == 2,:])

In [96]:
eff_sex = KSample("DCorr").test(scans[sex == 1,:], scans[sex == 2,:])

In [97]:
eff_sex

(0.08556028936106726, 2.93861984941599e-09)

In [98]:
eff_batch

(0.26217454597493495, 7.874182809594757e-25)

## Combatted

In [99]:
combat_scans = np.array(combat(pd.DataFrame(scans.T), datasets, model=None, numerical_covariates=None)).T

found 2 batches
found 0 numerical covariates...
found 0 categorical variables:	
Standardizing Data across genes.
Fitting L/S model and finding priors
Finding parametric adjustments


Adjusting data


In [100]:
eff_batch = KSample("DCorr").test(combat_scans[datasets == 1,:], combat_scans[datasets == 2,:])

In [101]:
eff_sex = KSample("DCorr").test(combat_scans[sex == 1,:], combat_scans[sex == 2,:])

In [102]:
eff_batch

(-0.013280257169799038, 1.0)

In [103]:
eff_sex

(0.09274982834747669, 6.721249660559353e-10)

## Z-Scored

In [104]:
zsc_scans = zscore(scans, axis=0)

In [105]:
eff_batch = KSample("DCorr").test(zsc_scans[datasets == 1,:], zsc_scans[datasets == 2,:])

In [106]:
eff_sex = KSample("DCorr").test(zsc_scans[sex == 1,:], zsc_scans[sex == 2,:])

In [107]:
eff_batch

(0.23672232435854018, 1.3443006340173307e-22)

In [108]:
eff_sex

(0.08103100277053915, 7.45471024575113e-09)

## PTR

In [109]:
def ptr(x):
    nz = x[x != 0]
    x_rank = rankdata(x)*2/(len(nz) + 1)
    x[x != 0] = x_rank
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    return(x)

In [110]:
ptr_scans = np.apply_along_axis(ptr, 0, scans)

In [111]:
eff_batch = KSample("DCorr").test(ptr_scans[datasets == 1,:], ptr_scans[datasets == 2,:])

In [112]:
eff_sex = KSample("DCorr").test(ptr_scans[sex == 1,:], ptr_scans[sex == 2,:])

In [113]:
eff_batch

(0.2473425195120012, 1.5732197530422215e-23)

In [114]:
eff_sex

(0.08160897491848358, 6.619264444855824e-09)

# PTR'd Graphs

## Uncorrected

In [115]:
ptr_scans = np.apply_along_axis(ptr, 1, scans)

In [116]:
eff_batch = KSample("DCorr").test(ptr_scans[datasets == 1,:], ptr_scans[datasets == 2,:])

In [117]:
eff_sex = KSample("DCorr").test(ptr_scans[sex == 1,:], ptr_scans[sex == 2,:])

In [118]:
eff_sex

(0.08423104363722812, 3.861326468299396e-09)

In [119]:
eff_batch

(0.25677518730257104, 2.3419726791230012e-24)

## Combatted

In [120]:
combat_scans = np.array(combat(pd.DataFrame(ptr_scans.T), datasets, model=None, numerical_covariates=None)).T

found 2 batches
found 0 numerical covariates...
found 0 categorical variables:	
Standardizing Data across genes.
Fitting L/S model and finding priors
Finding parametric adjustments


Adjusting data


In [121]:
eff_batch = KSample("DCorr").test(combat_scans[datasets == 1,:], combat_scans[datasets == 2,:])

In [122]:
eff_sex = KSample("DCorr").test(combat_scans[sex == 1,:], combat_scans[sex == 2,:])

In [123]:
eff_batch

(-0.025021197946169783, 1.0)

In [124]:
eff_sex

(0.08969986189036905, 1.2563162045631572e-09)

## Z-Scored

In [125]:
zsc_scans = zscore(ptr_scans, axis=0)

In [126]:
eff_batch = KSample("DCorr").test(zsc_scans[datasets == 1,:], zsc_scans[datasets == 2,:])

In [127]:
eff_sex = KSample("DCorr").test(zsc_scans[sex == 1,:], zsc_scans[sex == 2,:])

In [128]:
eff_batch

(0.2573073653329087, 2.1033927297823563e-24)

In [91]:
eff_sex

(0.08052881966343937, 8.265904434143783e-09)

## PTR

In [92]:
def ptr(x):
    nz = x[x != 0]
    x_rank = rankdata(x)*2/(len(nz) + 1)
    x[x != 0] = x_rank
    x = (x - np.min(x))/(np.max(x) - np.min(x))
    return(x)

In [93]:
ptrptr_scans = np.apply_along_axis(ptr, 0, ptr_scans)

ValueError: NumPy boolean array indexing assignment cannot assign 400 input values to the 399 output values where the mask is true

In [57]:
eff_batch = KSample("DCorr").test(ptrptr_scans[datasets == 1,:], ptrptr_scans[datasets == 2,:])

In [58]:
eff_sex = KSample("DCorr").test(ptrptr_scans[sex == 1,:], ptrptr_scans[sex == 2,:])

In [59]:
eff_batch

(0.24734251950415923, 1.5732197555338792e-23)

In [60]:
eff_sex

(0.08160897482923697, 6.619264566333888e-09)