In [132]:
import numpy as np
import mne
import matplotlib
%matplotlib inline
#%matplotlib qt
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import glob, os
import scipy
import pickle
import networkx as nx
import preprocessing as prep
from itertools import permutations
import pandas as pd
from tqdm import tqdm

from epoch_connectivity import ConnEEG as cnegg
from epoch_connectivity import ConnectionMatrices as cm
from epoch_connectivity import Connectomes as cntms

from epoch_connectivity.utils import FrequencyBand, only_EEG_channels

import mne_connectivity as mnecon

In [258]:
def get_column_name(measure_used, record_status, frequency_band, graph_measure=None):
    if measure_used is not 'wPLI':
        return measure_used+'_'+frequency_band+'_'+record_status+'_'+graph_measure
    else:
        return measure_used+'_'+frequency_band+'_'+record_status+'_'+'MEAN_SYNCH'
    
    
def r_nr_asarrays(column_name, full_dataframe):
    responders = np.array(full_dataframe.loc[full_dataframe["response"] == "R", column_name])
    non_responders = np.array(full_dataframe.loc[full_dataframe["response"] == "NR", column_name])
    return responders, non_responders

def get_reduced_df(full_dataframe, measure_used, record_status, graph_measure=None):
    columns_to_get = []
    for frequency_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
        columns_to_get.append(get_column_name(measure_used=measure_used, graph_measure=graph_measure,
                                              record_status=record_status, frequency_band=frequency_band))
    columns_to_get.append('response')
    return full_dataframe[columns_to_get]

def get_plot_title(measure_used, record_status, graph_measure=None):
    g_measure_name = {'GE':'Global efficiency', 'MOD':'Modularity', 'GRC':'Global reaching centrality',
                      'DA':'Degree assortativity', 'AVGCC':'Average clustering coefficient', 'MEAN_SYCNH':'Mean synchronization'}
    rec_status_name = {'SLEEP':'asleep', 'AWAKE':'awake'}
    if measure_used is not 'wPLI':
        if record_status=='ASRATIO':
            return f'{g_measure_name[graph_measure]} ratio between awake and asleep EEG', f'{g_measure_name[graph_measure]} awake / {g_measure_name[graph_measure]} asleep'
        else:
            return g_measure_name[graph_measure]+' ('+measure_used+') while '+rec_status_name[record_status], g_measure_name[graph_measure]+' value'
    else:
        if record_status=='ASRATIO':
            return f'wPLI mean synchronization ratio between awake and asleep EEG', 'Mean wPLI awake / Mean wPLI asleep'
        else:
            return 'wPLI mean synchronization while '+rec_status_name[record_status], 'Mean wPLI value'
    
def plot_boxplot(full_dataframe, measure_used, record_status, directory_to_save=None, graph_measure=None, savefig=False):
    reduced_df = get_reduced_df(full_dataframe=full_dataframe, measure_used=measure_used, graph_measure=graph_measure,
                                record_status=record_status)
    bp = sns.boxplot(x="variable", y="value", hue="response", data=pd.melt(reduced_df, id_vars='response'))
    bp.set_xticklabels(['delta', 'theta', 'alpha', 'beta', 'broadband'])
    title, y_label = get_plot_title(measure_used=measure_used, record_status=record_status, graph_measure=graph_measure)
    plt.xlabel('Frequency bands')
    plt.ylabel(y_label)
    plt.title(title)
    if savefig:
        plt.savefig(directory_to_save)
        plt.close()
    return

### Code used to test the normality of the data

In [None]:
import numpy as np
import statsmodels.api as sm
import pylab

column = get_column_name(measure_used='wPLI', record_status='SLEEP', frequency_band='delta')#, graph_measure='GE')
a, b = r_nr_asarrays(column_name=column, full_dataframe=full_df)

#test = np.random.normal(0,1, 1000)

sm.qqplot(a, line='45')
pylab.show()

### Mann-Whitney U test and Boxplot for the networks thresholded with the surrogate data test
It is necessary to do the Mann-Whitney U test because the data is not normal, so the T-student is not a good option. Also, we test the ration between the measures between awake and asleep EEG.

In [260]:
response = pd.read_csv('response.csv', sep=';') # Read the response data
features = pd.read_csv('surrogate_08042022.csv', index_col=0) # Read the features dataframe

# To create columns with the ration among the features on AWAKE/SLEEP state
for measure in ['wPLI', 'PDC', 'DTF']:
    if measure == 'wPLI':
        for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
            features[f'{measure}_{f_band}_ASRATIO_MEAN_SYNCH'] = features[f'{measure}_{f_band}_AWAKE_MEAN_SYNCH'].div(features[f'{measure}_{f_band}_SLEEP_MEAN_SYNCH'])
    else:
        for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
            for graph_measures in ['GE', 'GRC', 'MOD', 'DA', 'AVGCC']:
                features[f'{measure}_{f_band}_ASRATIO_{graph_measures}'] = features[f'{measure}_{f_band}_AWAKE_{graph_measures}'].div(features[f'{measure}_{f_band}_SLEEP_{graph_measures}'])

full_df = pd.merge(features, response, on='patient', how='outer') # Join the two dataframes
full_df = full_df.dropna() # Drop the NaN values
full_df.loc[full_df["response"] == "RP", "response"] = "R"
full_df.loc[full_df["response"] == "PR", "response"] = "R"

In [262]:
# Perform the boxplots for all the measures and the t-student test
directory = 'C:/code_thesis/brain_connectivity_epilepsy/data_analysis/surrogate/'
t_test_df = {}
t_test_df['measure'], t_test_df['delta'], t_test_df['theta'], t_test_df['alpha'], t_test_df['beta'], t_test_df['broadband'] = [], [], [], [], [], []
for measure in ['wPLI', 'PDC', 'DTF']:
    if measure == 'wPLI':
        for record_status in ['AWAKE', 'SLEEP', 'ASRATIO']:
            t_test_df['measure'].append(measure+'_'+record_status+'_'+'MEAN_SYNCH')
            to_save = directory+'surrogate_'+measure+'_'+record_status+'.pdf'
            plot_boxplot(full_dataframe=full_df, measure_used=measure, record_status=record_status,
                             graph_measure=graph_measures, directory_to_save=to_save, savefig=True)
            for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
                column = get_column_name(measure_used=measure, record_status=record_status, frequency_band=f_band)
                a, b = r_nr_asarrays(column_name=column, full_dataframe=full_df)
                t_test_df[f_band].append(scipy.stats.mannwhitneyu(a, b)[1])
    else:
        for record_status in ['AWAKE', 'SLEEP', 'ASRATIO']:
            for graph_measures in ['GE', 'GRC', 'MOD', 'DA', 'AVGCC']:
                t_test_df['measure'].append(measure+'_'+record_status+'_'+graph_measures)
                to_save = directory+'surrogate_'+measure+'_'+record_status+'_'+graph_measures+'.pdf'
                plot_boxplot(full_dataframe=full_df, measure_used=measure, record_status=record_status,
                             graph_measure=graph_measures, directory_to_save=to_save, savefig=True)
                for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
                    column = get_column_name(measure_used=measure, record_status=record_status, frequency_band=f_band, graph_measure=graph_measures)
                    a, b = r_nr_asarrays(column_name=column, full_dataframe=full_df)
                    t_test_df[f_band].append(scipy.stats.mannwhitneyu(a, b)[1])
t_test_df = pd.DataFrame(t_test_df)
#t_test_df.to_csv(directory+'surrogate_ttest.csv', sep=';')
t_test_df.to_csv(directory+'surrogate_mannwhiteyu.csv', sep=';')

### Mann-Whitney U test and Boxplot for the networks not thresholded
It is necessary to do the Mann-Whitney U test because the data is not normal, so the T-student is not a good option. Also, we test the ration between the measures between awake and asleep EEG.

In [264]:
response = pd.read_csv('response.csv', sep=';') # Read the response data
features = pd.read_csv('no_thresh_08042022.csv', index_col=0) # Read the features dataframe

# To create columns with the ration among the features on AWAKE/SLEEP state
for measure in ['wPLI', 'PDC', 'DTF']:
    if measure == 'wPLI':
        for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
            features[f'{measure}_{f_band}_ASRATIO_MEAN_SYNCH'] = features[f'{measure}_{f_band}_AWAKE_MEAN_SYNCH'].div(features[f'{measure}_{f_band}_SLEEP_MEAN_SYNCH'])
    else:
        for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
            for graph_measures in ['GE', 'GRC', 'MOD', 'DA', 'AVGCC']:
                features[f'{measure}_{f_band}_ASRATIO_{graph_measures}'] = features[f'{measure}_{f_band}_AWAKE_{graph_measures}'].div(features[f'{measure}_{f_band}_SLEEP_{graph_measures}'])

full_df = pd.merge(features, response, on='patient', how='outer') # Join the two dataframes
full_df = full_df.dropna() # Drop the NaN values
full_df.loc[full_df["response"] == "RP", "response"] = "R"
full_df.loc[full_df["response"] == "PR", "response"] = "R"

In [265]:
# Perform the boxplots for all the measures and the t-student test
directory = 'C:/code_thesis/brain_connectivity_epilepsy/data_analysis/no_threshold/'
t_test_df = {}
t_test_df['measure'], t_test_df['delta'], t_test_df['theta'], t_test_df['alpha'], t_test_df['beta'], t_test_df['broadband'] = [], [], [], [], [], []
for measure in ['wPLI', 'PDC', 'DTF']:
    if measure == 'wPLI':
        for record_status in ['AWAKE', 'SLEEP', 'ASRATIO']:
            t_test_df['measure'].append(measure+'_'+record_status+'_'+'MEAN_SYNCH')
            to_save = directory+'surrogate_'+measure+'_'+record_status+'.pdf'
            plot_boxplot(full_dataframe=full_df, measure_used=measure, record_status=record_status,
                             graph_measure=graph_measures, directory_to_save=to_save, savefig=True)
            for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
                column = get_column_name(measure_used=measure, record_status=record_status, frequency_band=f_band)
                a, b = r_nr_asarrays(column_name=column, full_dataframe=full_df)
                #t_test_df[f_band].append(scipy.stats.ttest_ind(a, b)[1])
                t_test_df[f_band].append(scipy.stats.mannwhitneyu(a, b)[1])
    else:
        for record_status in ['AWAKE', 'SLEEP', 'ASRATIO']:
            for graph_measures in ['GE', 'GRC', 'MOD', 'DA', 'AVGCC']:
                t_test_df['measure'].append(measure+'_'+record_status+'_'+graph_measures)
                to_save = directory+'surrogate_'+measure+'_'+record_status+'_'+graph_measures+'.pdf'
                plot_boxplot(full_dataframe=full_df, measure_used=measure, record_status=record_status,
                             graph_measure=graph_measures, directory_to_save=to_save, savefig=True)
                for f_band in ['delta', 'theta', 'alpha', 'beta', 'broadband']:
                    column = get_column_name(measure_used=measure, record_status=record_status, frequency_band=f_band, graph_measure=graph_measures)
                    a, b = r_nr_asarrays(column_name=column, full_dataframe=full_df)
                    #t_test_df[f_band].append(scipy.stats.ttest_ind(a, b)[1])
                    t_test_df[f_band].append(scipy.stats.mannwhitneyu(a, b)[1])
t_test_df = pd.DataFrame(t_test_df)
#t_test_df.to_csv(directory+'no_thresh_ttest.csv', sep=';')
t_test_df.to_csv(directory+'no_thresh_mannwhitneyu.csv', sep=';')

### Chi-squared test for the 

In [221]:
from sklearn.feature_selection import f_classif

In [220]:
response = pd.read_csv('response.csv', sep=';') # Read the response data
features = pd.read_csv('surrogate_08042022.csv', index_col=0) # Read the features dataframe
full_df = pd.merge(features, response, on='patient', how='outer') # Join the two dataframes
full_df = full_df.dropna() # Drop the NaN values

response = full_df['response'].copy()
full_df.drop(columns=['response'], inplace=True)

response.loc[response["response"] == "RP", "response"] = 1
response.loc[response["response"] == "PR", "response"] = 1

f_classif(full_df, response)

ValueError: could not convert string to float: 'NR'