In [1]:
import pandas as pd
import numpy as np
import os.path as op
from scipy.stats import zscore, ks_2samp
import os
import itertools

import matplotlib.pyplot as plt

%matplotlib inline

# from varmodel_utils import objective, get_variance_indices

In [2]:
bp = '/data/RocklandSample/derivatives/paper1/'
exp = 'connectomes_agebmi'
bpp = op.join(bp, exp)
try:
    os.makedirs(bpp)
except FileExistsError:
    pass

# Load DF and grab graphs
df = pd.read_hdf(bpp + '.h5')
df.drop(columns=["session", "directions", "seed"], inplace=True)
df = df.reset_index(level=0)

# Load stats DF and grab graphs
df_s = pd.read_hdf(bpp + '_stats.h5')

df = df.merge(df_s, how='outer', on='index', validate='1:1')

# Reorganize DFs with heirarchical indexing
cols = ["subject", "pipeline"]
df = df.set_index(cols)

In [3]:
uniq_ids = list(set(df.index))

In [4]:
uni_stats = ['modularity', 'assort', 'avplength', 'edgecount', 'globaleffic']
multi_stats = ['degree', 'weight', 'ccoeff', 'betweenness', 'plength']

### Univariate

In [5]:
z_crit = 1.645

In [6]:
ids = uniq_ids
zscore_dict = {}
for unistat in uni_stats:
    zstat = []
    for idd in ids:
        tdat = df.loc[idd][unistat].values
        z = np.nan_to_num(zscore(tdat, ddof=0))
        zstat += [z]

    zscore_dict[unistat] = zstat

  
  return (a - mns) / sstd


In [9]:
print("Percent of MCA sims resulting in significant (p < 0.05) difference:")
for key in zscore_dict:
    print(key, end=':  \t')
    tmp = np.reshape(np.stack(zscore_dict[key]), (-1))
    perc_sig = np.sum((np.abs(tmp) > z_crit).astype(int))/len(tmp)*100
    print("%02.3f" % perc_sig)

Percent of MCA sims resulting in significant (p < 0.05) difference:
modularity:  	10.005
assort:  	1.780
avplength:  	1.708
edgecount:  	1.852
globaleffic:  	1.732


### Multivariate

In [8]:
ids = uniq_ids
ksscore_dict = {}
ksp_dict = {}
for multistat in multi_stats:
    ksstat = []
    ksp = []
    for idd in ids:
        tdat = df.loc[idd][multistat].values
        tmp_ks = []
        tmp_p = []
        for i, j in itertools.combinations(range(len(tdat)), 2):
            tmp = ks_2samp(tdat[i], tdat[j])
            tmp_ks += [tmp[0]]
            tmp_p += [tmp[1]]

        ksstat += [np.mean(tmp_ks)]
        ksp += [np.mean(tmp_p)]
    ksscore_dict[multistat] = ksstat
    ksp_dict[multistat] = ksp

  


In [10]:
print("Percent of MCA sims resulting in significant (p < 0.05) difference:")
for key in ksp_dict:
    print(key, end=':  \t')
    tmp = np.reshape(np.stack(ksp_dict[key]), (-1))
    perc_sig = np.sum((np.abs(tmp) < 0.05).astype(int))/len(tmp)*100
    print("%02.3f" % perc_sig)

Percent of MCA sims resulting in significant (p < 0.05) difference:
degree:  	0.000
weight:  	0.000
ccoeff:  	0.000
betweenness:  	0.000
plength:  	0.000
