In [30]:
import networkx as nx
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import os
import mne
from scipy.stats import ttest_ind, f_oneway, ttest_rel

In [31]:
lobe_map = {"Frontal":{
                "frontalpole":"FP",
                "medialorbitofrontal":"MO",
                "lateralorbitofrontal":"LO",
                "paracentral":"ParaC",
                "parsopercularis":"POp",
                "parsorbitalis":"POr",
                "parstriangularis":"PT",
                "precentral": "PreC",
                "rostralanteriorcingulate":"RAC",
                "rostralmiddlefrontal":"RMF",
                "superiorfrontal":"SF",
                "caudalanteriorcingulate":"CACC",
                "caudalmiddlefrontal":"CMF",
            },
            "Parietal":{
                "superiorparietal":"SP",
                "inferiorparietal":"IP",
                "isthmuscingulate":"IC",
                "postcentral":"PG",
                "posteriorcingulate":"PCC",
                "precuneus":"PCUN",
                "supramarginal":"SG",
            },
            "Temporal":{
                "bankssts":"BSTS",
                "entorhinal":"ENT",
                "fusiform":"FUS",
                "middletemporal":"MT",
                "parahippocampal":"PHG",
                "superiortemporal": "STG",
                "temporalpole":"TP",
                "transversetemporal":"TTG",
                "inferiortemporal":"ITG",
            },
            "Occipital":{
                "cuneus":"CUN",
                "lateraloccipital":"LOC",
                "lingual":"LNG",
                "pericalcarine":"PCAL",
            },
            "Insula":{
                "insula":"INS"
            }
}
lobe_map_color = {"Frontal": (1,0,0),
                  "Insula": (0,1,0),
                "Temporal": (0,0,1),
                "Parietal": (1, 1,0), 
                 "Occipital": (0,1,1),
}

In [32]:
# all functions
# example output/no_split/1_al_ciplv_theta_1.npy
def parse_filename(filename):
    #get file name
    filename = os.path.basename(filename)
    s = filename.split("_")
    pId = s[0]
    label = s[1]
    method = s[2]
    freq = s[-1].split(".")[0]
    # epoch = s[-1].split(".")[0]
    
    # return {"pId": pId, "label": label, "method": method, "freq": freq, "epoch": epoch}
    return {"pId": pId, "label": label, "method": method, "freq": freq}

def read_file(filename):
    return np.load(filename)

# get all the files in the folder output/
def get_files(folder, filter = None):
    '''
    filter is a dictionary, has 2 keys: method and freq to filter the files
    '''
    files = glob.glob(os.path.join(folder, "*.npy"))
    ret_files = []
    if filter:
        for f in files:
            # get the filename
            filename = os.path.basename(f)
            f_info = parse_filename(filename)
            # remove epoch 2 if no overlap
            # if f_info["epoch"] == "2":
            #     continue
            if (f_info["method"] == filter["method"]) and (f_info["freq"] == filter["freq"]):
                ret_files.append(f)
    else:
        ret_files = files

    return ret_files

def lower_to_links(data):
    """
    data: lower part of diagonal matrix
    """
    li = []
    lj = []
    lv = []
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            if i > j:
                li.append(i)
                lj.append(j)
                lv.append(data[i][j])

    links = pd.DataFrame({"source": li, "target": lj, "weight": lv})

    return links

def conn_to_links(data):
    """
    data: lower part of diagonal matrix
    """
    li = []
    lj = []
    lv = []
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            li.append(i)
            lj.append(j)
            lv.append(data[i][j])

    links = pd.DataFrame({"source": li, "target": lj, "weight": lv})

    return links

def get_lobe(name):
    # get hemisphere and label
    label = name.split("-")[0]
    hemisphere = name.split("-")[1]

    for lobe in lobe_map:
        if label in lobe_map[lobe]:
            return lobe + "-" + hemisphere
    return None

def feature_extraction_lobe_level(data):    
    # get the lobe level using the lobe_map
    data["source_lobe"] = data["source"].apply(lambda x: get_lobe(x))
    data["target_lobe"] = data["target"].apply(lambda x: get_lobe(x))
    # average the value for the same lobe
    data = data.groupby(["source_lobe", "target_lobe"]).mean().reset_index()
    # get the unique lobe
    unique_lobe = data["source_lobe"].unique()
    corr_matrix = pd.DataFrame(np.zeros((len(unique_lobe), len(unique_lobe))), columns=unique_lobe, index=unique_lobe)

    # fill the correlation matrix
    for r in range(len(data)):
        source_lobe = data.iloc[r]["source_lobe"]
        target_lobe = data.iloc[r]["target_lobe"]
        value = data.iloc[r]["value"]
        corr_matrix.at[source_lobe, target_lobe] = value

    #corr_matrix to pandas dataframe source_lobe target_lobe value
    corr_df = corr_matrix.stack().reset_index()
    corr_df = corr_df.rename(columns={"level_0": "source_lobe", "level_1": "target_lobe", 0: "value"})
    corr_df = corr_df.sort_values(by=["source_lobe", "target_lobe"])

    return corr_df

In [44]:
def global_efficiency(G):
    n = len(G)
    denom = n * (n - 1)
    if denom != 0:
        lengths = nx.all_pairs_dijkstra_path(G)
        g_eff = 0
        for source, targets in lengths:
            for target, paths in targets.items():
                # print(source, target)
                d = 0
                if len(paths) > 1:
                    for i in range(len(paths) - 1):
                        w = G.get_edge_data(paths[i], paths[i+1])["value"]
                        d += w/(i+1)

                    # d = d / (len(paths) - 1) # average over path length
                    g_eff += 1 / d
        g_eff /= denom
        
    else:
        g_eff = 0

    return g_eff
    
def local_efficiency(G):
    efficiency_list = (global_efficiency(G.subgraph(G[v])) for v in G)
    return sum(efficiency_list) / len(G)

In [45]:
from itertools import permutations

def global_efficiency_weighted(G):
    n = len(G)
    denom = n * (n - 1)
    if denom != 0:
        shortest_paths = nx.all_pairs_dijkstra(G, weight = 'value')
        g_eff = sum(1./shortest_paths[u][0][v] if shortest_paths[u][0][v] !=0 else 0 for u, v in permutations(G, 2)) / denom
    else:
        g_eff = 0
    return g_eff

def local_efficiency_weighted(G):
    efficiency_list = (global_efficiency_weighted(G.subgraph(G[v])) for v in G)
    return sum(efficiency_list) / len(G)

In [46]:
data_source = "dpli"
fatigue_labels = {"al": 0, "fa":1}

methods = ['dpli']
freqs = ['delta', 'theta', 'alpha', 'beta', 'gamma']

fs_dir = mne.datasets.fetch_fsaverage(verbose=True)
subjects_dir = os.path.dirname(fs_dir)

labels = mne.read_labels_from_annot('fsaverage', parc='aparc',
                                            subjects_dir=subjects_dir, verbose=False)
labels.pop(-1) # remove the unknown label
label_names = [label.name for label in labels]

0 files missing from root.txt in C:\Users\work\mne_data\MNE-fsaverage-data
0 files missing from bem.txt in C:\Users\work\mne_data\MNE-fsaverage-data\fsaverage


In [47]:
l_method =[]
l_pid = []
l_label = []
l_local_eff = []
l_global_eff = []
l_freq = []
for freq in freqs:
    filter = {"method": methods[0], "freq": freq}
    files = get_files("output/dpli/", filter)
    print("method: {}, freq: {}, files: {}".format(methods[0], freq, len(files)))
    print("progress:")
    for i, f in enumerate(files):
        print(i, end=",")
        metadata = parse_filename(f)
        data = read_file(f)
        df_corr = pd.DataFrame(data, columns = label_names, index = label_names)
        df_pair = df_corr.stack().reset_index()
        df_pair.columns = ["source", "target", "value"]
        # links = lower_to_links(data)
        # links = conn_to_links(data)
        
        links = feature_extraction_lobe_level(df_pair)
        
        # get 30% largest weights
        links = links.sort_values(by="value", ascending=False)
        links_filtered = links.iloc[:int(len(links)*0.05)]
        
        G=nx.from_pandas_edgelist(links_filtered, 'source_lobe', 'target_lobe', edge_attr=['value'])
        # local_eff = nx.algorithms.local_efficiency(G)
        # global_eff = nx.algorithms.global_efficiency(G)
        local_eff = local_efficiency(G)
        global_eff = global_efficiency(G)

        l_method.append(methods[0])
        l_pid.append(metadata["pId"])
        l_label.append(fatigue_labels[metadata["label"]])
        l_freq.append(metadata["freq"])
        l_local_eff.append(local_eff)
        l_global_eff.append(global_eff)

method: dpli, freq: delta, files: 96
progress:
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,method: dpli, freq: theta, files: 96
progress:
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,method: dpli, freq: alpha, files: 96
progress:
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,method: dpli, freq: beta,

In [50]:
df_res = pd.DataFrame({"method": l_method, 
                        "pid": l_pid, 
                        "label": l_label, 
                        "freq": l_freq, 
                        "local_eff": l_local_eff, 
                        "global_eff": l_global_eff})
df_res.head()

Unnamed: 0,method,pid,label,freq,local_eff,global_eff
0,dpli,10,0,delta,0.992678,1.912495
1,dpli,10,1,delta,0.0,0.864649
2,dpli,11,0,delta,0.855736,1.781931
3,dpli,11,1,delta,0.950007,1.847283
4,dpli,12,0,delta,0.0,1.492197


In [56]:
df_res

Unnamed: 0,method,pid,label,freq,local_eff,global_eff
0,dpli,10,0,delta,0.992678,1.912495
1,dpli,10,1,delta,0.000000,0.864649
2,dpli,11,0,delta,0.855736,1.781931
3,dpli,11,1,delta,0.950007,1.847283
4,dpli,12,0,delta,0.000000,1.492197
...,...,...,...,...,...,...
475,dpli,7,1,gamma,2.043349,2.113698
476,dpli,8,0,gamma,0.764995,1.884606
477,dpli,8,1,gamma,2.029174,2.126209
478,dpli,9,0,gamma,2.004403,2.124989


# t test

In [51]:
def t_test(df, freq, feature = "local_eff"):
    df_freq = df[df["freq"] == freq]
    df1 = df_freq[df_freq["label"] == 0]
    df2 = df_freq[df_freq["label"] == 1]
    t, p = ttest_rel(df1[feature], df2[feature])
    return t, p

In [52]:
for freq in freqs:
    print("------freq: {} --------------".format(freq))
    test_res_local_eff = t_test(df_res, freq, "local_eff")
    test_res_global_eff = t_test(df_res, freq, "global_eff")
    print("local_eff: t={}, p={}".format(test_res_local_eff[0], test_res_local_eff[1]))
    print("global_eff: t={}, p={}".format(test_res_global_eff[0], test_res_global_eff[1]))


------freq: delta --------------
local_eff: t=-1.0571835467832718, p=0.29583150594655366
global_eff: t=-0.30900756006345564, p=0.7586814585470651
------freq: theta --------------
local_eff: t=-1.1277005735517909, p=0.2651705239403596
global_eff: t=-1.3553161821978634, p=0.18179476308600406
------freq: alpha --------------
local_eff: t=-0.5159372737050222, p=0.6083170108742559
global_eff: t=-0.26827301586477675, p=0.7896631639151953
------freq: beta --------------
local_eff: t=0.8945101343336356, p=0.37560708440397905
global_eff: t=0.8837516180526, p=0.3813287341535623
------freq: gamma --------------
local_eff: t=0.47623094806346894, p=0.6361163185396693
global_eff: t=0.3750241182864998, p=0.7093295790447243


In [53]:

df_freq = df_res[df_res["freq"] == "delta"]
df1 = df_freq[df_freq["label"] == 0]
df2 = df_freq[df_freq["label"] == 1]

In [55]:
print(df1['local_eff'].mean(), df2['local_eff'].mean())
print(df1['global_eff'].mean(), df2['global_eff'].mean())


0.5541215380813288 0.7042020193337987
1.7190532979281261 1.7350600757031458
