In [32]:
import pandas as pd
import numpy as np
import os.path as op
from scipy.stats import zscore, ks_2samp
import os
import itertools

import matplotlib.pyplot as plt

%matplotlib inline

# from varmodel_utils import objective, get_variance_indices

In [9]:
bp = '/data/RocklandSample/derivatives/paper1/'
exp = 'connectomes_agebmi'
bpp = op.join(bp, exp)
try:
    os.makedirs(bpp)
except FileExistsError:
    pass

# Load DF and grab graphs
df = pd.read_hdf(bpp + '.h5')
df.drop(columns=["session", "directions", "seed"], inplace=True)
df = df.reset_index(level=0)

# Load stats DF and grab graphs
df_s = pd.read_hdf(bpp + '_stats.h5')

df = df.merge(df_s, how='outer', on='index', validate='1:1')

# Reorganize DFs with heirarchical indexing
cols = ["subject", "pipeline"]
df = df.set_index(cols)

In [10]:
uniq_ids = list(set(df.index))

In [11]:
uni_stats = ['modularity', 'assort', 'avplength', 'edgecount', 'globaleffic']
multi_stats = ['degree', 'weight', 'ccoeff', 'betweenness', 'plength']

### Univariate

In [12]:
z_crit = 1.645

In [22]:
ids = uniq_ids
zscore_dict = {}
for unistat in uni_stats:
    zstat = []
    for idd in ids:
        tdat = df.loc[idd][unistat].values
        z = np.nan_to_num(zscore(tdat, ddof=0))
        zstat += [z]
#         z_sig = (np.abs(z) > z_crit).astype(int)
#         print("{0}".format(np.sum(z_sig)/len(z_sig)*100))

    zscore_dict[unistat] = zstat

  


### Multivariate

In [48]:
ids = uniq_ids
ksscore_dict = {}
ksp_dict = {}
for multistat in multi_stats:
    ksstat = []
    ksp = []
    for idd in ids:
        tdat = df.loc[idd][multistat].values
        tmp_ks = []
        tmp_p = []
        for i, j in itertools.combinations(range(len(tdat)), 2):
            tmp = ks_2samp(tdat[i], tdat[j])
            tmp_ks += [tmp[0]]
            tmp_p += [tmp[1]]

        ksstat += [np.mean(tmp_ks)]
        ksp += [np.mean(tmp_p)]
    ksscore_dict[multistat] = ksstat
    ksp_dict[multistat] = ksp

  


In [50]:
ksp_dict.values()

dict_values([[1.0, 1.0, 0.999999999982476, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999993963158986, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.999999698107568, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999514341392368, 1.0, 1.0, 1.0, 1.0, 0.9994351257838763, 0.9999616537517115, 1.0, 1.0, 1.0, 1.0, 1.0, 0.99999106315874, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999999999627616, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999448364167018, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999999245115584, 1.0, 1.0, 1.0, 1.0, 1.0, 0.999842610440682, 0.9999999999474283, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9997732705291501, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999996227111272, 1.0, 1.0, 0.9955663007680889, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999