# Model input connectivity format preparation 

In [1]:
import mne      # toolbox for analyzing and visualizing EEG data
import os       # using operating system dependent functionality (folders)
import pandas as pd # data analysis and manipulation
import numpy as np    # numerical computing (manipulating and performing operations on arrays of data)
import copy     # Can Copy and Deepcopy files so original file is untouched.
from ipywidgets import IntSlider, Output
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
from math import nan
from scipy import stats
from mne_connectivity import spectral_connectivity_epochs
from mne_connectivity.viz import plot_sensors_connectivity
import pyvista 
import mayavi
import vtk

import sys
sys.path.insert(0, '../eegyolk') # path to helper functions
import helper_functions as hf # library useful for eeg and erp data cleaning
#import initialization_functions #library to import data
import epod_helper

<font color='blue'>
REVIEW: It should be possible now to install `eegyolk'.  We no longer need to
manipulate the `sys.path</font>

In [2]:
metadata = pd.read_csv('metadata.csv', sep=',')

In [3]:
metadata

Unnamed: 0,eeg_file,ParticipantID,test,sex,age_months,age_months_days,dyslexic_parent,Group_AccToParents,path_eeg,path_epoch,path_eventmarkers,epoch_file
0,101a,101,a,m,20,20;22,m,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,101a_epo.fif
1,101b,101,b,m,23,23;16,m,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,101b_epo.fif
2,102a,102,a,f,20,20;27,Nee,Control,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,102a_epo.fif
3,102b,102,b,f,23,23;16,Nee,Control,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,102b_epo.fif
4,103a,103,a,f,20,20;23,m,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,103a_epo.fif
...,...,...,...,...,...,...,...,...,...,...,...,...
173,194a,194,a,f,19,19;0,f,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,194a_epo.fif
174,194b,194,b,f,22,22;3,f,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,194b_epo.fif
175,195a,195,a,f,18,18;10,f,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,195a_epo.fif
176,211a,211,a,m,18,18;27,m,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,211a_epo.fif


In [4]:
drop_files = [
    "113a", "107b (deel 1+2)", "132a", "121b(2)", "113b", "107b (deel 3+4)", "147a",
    "121a", "134a", "143b", "121b(1)", "145b", "152a", "184a", "165a", "151a", "163a",
    "179a","179b", "182b", "186a", "193b",
]

metadata = metadata[~metadata['eeg_file'].isin(drop_files)]
metadata = metadata.drop(metadata[metadata['test'] == "b"].index).reset_index()

In [5]:
metadata

Unnamed: 0,index,eeg_file,ParticipantID,test,sex,age_months,age_months_days,dyslexic_parent,Group_AccToParents,path_eeg,path_epoch,path_eventmarkers,epoch_file
0,0,101a,101,a,m,20,20;22,m,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,101a_epo.fif
1,2,102a,102,a,f,20,20;27,Nee,Control,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,102a_epo.fif
2,4,103a,103,a,f,20,20;23,m,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,103a_epo.fif
3,6,104a,104,a,m,18,18;12,f,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,104a_epo.fif
4,8,105a,105,a,f,17,17;9,f,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,105a_epo.fif
...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,171,193a,193,a,f,20,20;2,mf,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,193a_epo.fif
86,173,194a,194,a,f,19,19;0,f,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,194a_epo.fif
87,175,195a,195,a,f,18,18;10,f,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,195a_epo.fif
88,176,211a,211,a,m,18,18;27,m,At risk,F:\Stage\ePODIUM\Data\ePodium_projectfolder\Da...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ep...,F:\Stage\ePODIUM\Data\ePodium_projectfolder\ev...,211a_epo.fif



def read_filtered_data(metadata):
    epochs = []
    for index, file in metadata.iterrows():
        print(f"Checking out file: {file['epoch_file']}")
        path = os.path.join(file['path_epoch'], file['epoch_file'])
        epoch = mne.read_epochs(path, preload=False)
        epochs.append(epoch)
    return epochs

<font color='blue'>Review:  This function is also defined in data_analysis notebook.
Please put this in a shared module. And below please control output if possible</font>

In [7]:
epochs = read_filtered_data(metadata)


Checking out file: 101a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\101a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2266 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 102a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\102a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2389 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 103a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\103a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
1167 matching events found
No baseline correction applied
0 projection items activ

        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 131a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\131a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 133a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\133a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 135a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\135a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.2

Checking out file: 160a (2)_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\160a (2)_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 160a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\160a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 161a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\161a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection ite

        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 178a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\178a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 180a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\180a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.20 ms
        0 CTF compensation matrices available
Not setting metadata
2440 matching events found
No baseline correction applied
0 projection items activated
Checking out file: 181a_epo.fif
Reading F:\Stage\ePODIUM\Data\ePodium_projectfolder\epochs\181a_epo.fif ...
    Found the data of interest:
        t =    -299.80 ...     700.2

<font color='blue'>This looks like an extended version of `data_connectivity()'
from data_analysis notebook.  Please put them in the same module and reusethe code in both notebooks.</font>

In [1]:

def input_connectivity_prep(epochs, metadata):
    # loop over epochs
    dfcon = pd.DataFrame()
    
    for i in range(len(metadata)):
        print(f"Calculating connectivity of file {i} \n", end = '')
        # define values for connectivity
        fmin, fmax = 4., 9.
        sfreq = epochs[i].info['sfreq']  # the sampling frequency
        tmin = 0.0  # exclude the baseline period
        
        # calculate connectivity for all epochs per participant
        con = spectral_connectivity_epochs(
            epochs[i], method='pli', mode='multitaper', fmin=fmin, fmax=fmax,
            faverage=True, tmin=tmin, mt_adaptive=False, n_jobs=1)
        
        # transform connectivity to dataframe 
        epochrow = pd.DataFrame(con.get_data())
        # transpose into row and remove all zero connectivity values
        epochrow = epochrow.loc[~(epochrow == 0).all(axis=1)].transpose()
        epochrow['eeg_file'] = metadata['eeg_file'][i]
        #epochrow['test'] = metadata['test'][i]
        epochrow['Group_AccToParents'] = metadata['Group_AccToParents'][i]
        # append row to total dataframe
        dfcon = dfcon.append(epochrow)
        # we need participand ID
        
    return dfcon
    

In [None]:
dfcon = input_connectivity_prep(epochs, metadata)
# REVIEW: Same comment on reducing output and not committing it with the notebook.

In [14]:
#dfcon = dfcon.loc[dfcon['test'] == 'a']
dfcon = dfcon.drop(['eeg_file'], axis=1)

In [15]:
dfcon['Group_AccToParents'] = np.where((dfcon['Group_AccToParents'] == 'At risk'), 1, 0)

In [16]:
dfcon

Unnamed: 0,32,64,65,96,97,98,128,129,130,131,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,Group_AccToParents
0,0.152030,0.069506,0.028244,0.216019,0.236540,0.110768,0.146072,0.234775,0.123786,0.224846,...,0.092895,0.109223,0.091130,0.110989,0.126434,0.059135,0.116726,0.058032,0.164387,1
0,0.049602,0.027627,0.060067,0.021557,0.016116,0.090414,0.022394,0.053370,0.050649,0.114483,...,0.114692,0.096693,0.043533,0.169527,0.042905,0.028882,0.031812,0.038928,0.098577,0
0,0.380034,0.336761,0.322622,0.404456,0.434876,0.360326,0.452442,0.464439,0.064267,0.492716,...,0.339332,0.338046,0.392888,0.127249,0.395458,0.368038,0.422022,0.341902,0.328620,1
0,0.044057,0.063730,0.048361,0.123975,0.148566,0.029508,0.107992,0.155943,0.071107,0.087090,...,0.045082,0.057582,0.068238,0.159016,0.117418,0.093033,0.115574,0.086475,0.150410,1
0,0.137384,0.124695,0.082479,0.244021,0.198389,0.036115,0.171791,0.170327,0.102733,0.158370,...,0.027086,0.053197,0.029771,0.063202,0.110786,0.061249,0.121279,0.102245,0.085652,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.066393,0.027664,0.032787,0.065574,0.107992,0.030533,0.080123,0.150205,0.065164,0.136680,...,0.021107,0.042828,0.038525,0.177869,0.128074,0.068852,0.125820,0.072951,0.175820,1
0,0.126844,0.023566,0.020287,0.169672,0.138115,0.068033,0.156762,0.223156,0.099385,0.187500,...,0.070287,0.071107,0.085656,0.211066,0.145902,0.079508,0.137295,0.077049,0.206352,1
0,0.105123,0.071107,0.068033,0.112910,0.087705,0.031762,0.079713,0.110246,0.099590,0.098566,...,0.155738,0.183811,0.116598,0.162090,0.189959,0.080123,0.158402,0.123156,0.202049,1
0,0.121926,0.013934,0.025000,0.178689,0.180533,0.065779,0.159016,0.177664,0.090164,0.148770,...,0.071926,0.055328,0.032787,0.115984,0.097131,0.053689,0.102869,0.072131,0.128484,1


In [17]:

dfcon.to_csv('df_connectivity.csv', index=False) # safe dataframe