In [29]:
import networkx as nx
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import os
from scipy.stats import ttest_ind, f_oneway

In [4]:
# all functions
# example output/no_split/1_al_ciplv_theta_1.npy
def parse_filename(filename):
    #get file name
    filename = os.path.basename(filename)
    s = filename.split("_")
    pId = s[0]
    label = s[1]
    method = s[2]
    freq = s[-1].split(".")[0]
    # epoch = s[-1].split(".")[0]
    
    # return {"pId": pId, "label": label, "method": method, "freq": freq, "epoch": epoch}
    return {"pId": pId, "label": label, "method": method, "freq": freq}

def read_file(filename):
    return np.load(filename)

# get all the files in the folder output/
def get_files(folder, filter = None):
    '''
    filter is a dictionary, has 2 keys: method and freq to filter the files
    '''
    files = glob.glob(os.path.join(folder, "*.npy"))
    ret_files = []
    if filter:
        for f in files:
            # get the filename
            filename = os.path.basename(f)
            f_info = parse_filename(filename)
            # remove epoch 2 if no overlap
            # if f_info["epoch"] == "2":
            #     continue
            if (f_info["method"] == filter["method"]) and (f_info["freq"] == filter["freq"]):
                ret_files.append(f)
    else:
        ret_files = files

    return ret_files

def lower_to_links(data):
    """
    data: lower part of diagonal matrix
    """
    li = []
    lj = []
    lv = []
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            if i > j:
                li.append(i)
                lj.append(j)
                lv.append(data[i][j])

    links = pd.DataFrame({"source": li, "target": lj, "weight": lv})

    return links

# def feature_extraction(data):
#     """
#     get lower part of diagonal matrix
#     """
#     data_lower = data[np.tril_indices(data.shape[0], k=-1)]
#     return data_lower

In [126]:
def global_efficiency(G):
    n = len(G)
    denom = n * (n - 1)
    if denom != 0:
        lengths = nx.all_pairs_dijkstra_path(G)
        g_eff = 0
        for source, targets in lengths:
            for target, paths in targets.items():
                # print(source, target)
                d = 0
                if len(paths) > 1:
                    for i in range(len(paths) - 1):
                        w = G.get_edge_data(paths[i], paths[i+1])["weight"]
                        d += w/(i+1)

                    # d = d / (len(paths) - 1) # average over path length
                    g_eff += 1 / d
        g_eff /= denom
        
    else:
        g_eff = 0

    return g_eff
    
def local_efficiency(G):
    efficiency_list = (global_efficiency(G.subgraph(G[v])) for v in G)
    return sum(efficiency_list) / len(G)

In [None]:
from itertools import permutations

def global_efficiency_weighted(G):
    n = len(G)
    denom = n * (n - 1)
    if denom != 0:
        shortest_paths = nx.all_pairs_dijkstra(G, weight = 'weight')
        g_eff = sum(1./shortest_paths[u][0][v] if shortest_paths[u][0][v] !=0 else 0 for u, v in permutations(G, 2)) / denom
    else:
        g_eff = 0
    return g_eff

def local_efficiency_weighted(G):
    efficiency_list = (global_efficiency_weighted(G.subgraph(G[v])) for v in G)
    return sum(efficiency_list) / len(G)

In [127]:
data_source = "no_split"
labels = {"al": 0, "fa":1}

methods = ['pli']
freqs = ['delta', 'theta', 'alpha', 'beta', 'gamma']

In [128]:
l_method =[]
l_pid = []
l_label = []
l_local_eff = []
l_global_eff = []
l_freq = []
for freq in freqs:
    filter = {"method": methods[0], "freq": freq}
    files = get_files("output/no_split/", filter)
    print("method: {}, freq: {}, files: {}".format(methods[0], freq, len(files)))
    print("progress:")
    for i, f in enumerate(files):
        print(i, end=",")
        metadata = parse_filename(f)
        data = read_file(f)
        links = lower_to_links(data)
        # get 30% largest weights
        links = links.sort_values(by="weight", ascending=False)
        links_filtered = links.iloc[:int(len(links)*0.05)]
        
        G=nx.from_pandas_edgelist(links_filtered, 'source', 'target', edge_attr=['weight'])
        # local_eff = nx.algorithms.local_efficiency(G)
        # global_eff = nx.algorithms.global_efficiency(G)
        local_eff = local_efficiency(G)
        global_eff = global_efficiency(G)

        l_method.append(methods[0])
        l_pid.append(metadata["pId"])
        l_label.append(labels[metadata["label"]])
        l_freq.append(metadata["freq"])
        l_local_eff.append(local_eff)
        l_global_eff.append(global_eff)

method: pli, freq: delta, files: 96
progress:
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,method: pli, freq: theta, files: 96
progress:
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,method: pli, freq: alpha, files: 96
progress:
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,method: pli, freq: beta, fil

In [132]:
df_res = pd.DataFrame({"method": l_method, 
                        "pid": l_pid, 
                        "label": l_label, 
                        "freq": l_freq, 
                        "local_eff": l_local_eff, 
                        "global_eff": l_global_eff})
df_res.head()

Unnamed: 0,method,pid,label,freq,local_eff,global_eff
0,pli,10,0,delta,0.150087,1.099452
1,pli,10,1,delta,0.057781,1.092599
2,pli,11,0,delta,0.129768,1.110913
3,pli,11,1,delta,0.180557,1.128173
4,pli,12,0,delta,0.222099,1.149568


# t test

In [133]:
def t_test(df, freq, feature = "local_eff"):
    df_freq = df[df["freq"] == freq]
    df1 = df_freq[df_freq["label"] == 0]
    df2 = df_freq[df_freq["label"] == 1]
    t, p = ttest_ind(df1[feature], df2[feature])
    return t, p

In [134]:
for freq in freqs:
    print("------freq: {} --------------".format(freq))
    test_res_local_eff = t_test(df_res, freq, "local_eff")
    test_res_global_eff = t_test(df_res, freq, "global_eff")
    print("local_eff: t={}, p={}".format(test_res_local_eff[0], test_res_local_eff[1]))
    print("global_eff: t={}, p={}".format(test_res_global_eff[0], test_res_global_eff[1]))


------freq: delta --------------
local_eff: t=-0.18182015270957702, p=0.8561153773176442
global_eff: t=-0.6511984897089096, p=0.5165071789028101
------freq: theta --------------
local_eff: t=-1.569106358384315, p=0.11998340794158524
global_eff: t=-0.7635286709941649, p=0.44706030575711553
------freq: alpha --------------
local_eff: t=-0.1901072774305687, p=0.8496350085336779
global_eff: t=-0.2987217204261977, p=0.76581176516413
------freq: beta --------------
local_eff: t=-2.6784278666469534, p=0.00873145297329737
global_eff: t=-0.3671277370009883, p=0.7143489467118126
------freq: gamma --------------
local_eff: t=-1.74743539432364, p=0.0838275550336266
global_eff: t=-1.024730982469862, p=0.3081200091818404


In [135]:

df_freq = df_res[df_res["freq"] == "delta"]
df1 = df_freq[df_freq["label"] == 0]
df2 = df_freq[df_freq["label"] == 1]

In [137]:
df1['local_eff'].mean(), df2['local_eff'].mean()
df1['global_eff'].mean(), df2['global_eff'].mean()


(0.2538223966184485, 0.26170702249193617)