## Reading Preprocessed EEG Data

In [1]:
import numpy as np
import scipy
from scipy.io import loadmat
import mne, glob
from mne_features.feature_extraction import extract_features
import pandas as pd
%matplotlib inline

#### reading channel names from the channel-order xlsx file (make sure you have that file in the same folder btw, I think it's originally in the "SEED_EEG file")

In [2]:
import xlrd
channels=pd.read_excel("channel-order.xlsx")
#get first column
channelNames=channels.iloc[:,0]
#make this into a normal list
channelNames=np.ndarray.tolist(pd.Series.to_numpy(channelNames))
#insert the channel name "FP1" at front becuz it got left out
channelNames.insert(0,"FP1")

#### group all the .mat files tgt

In [3]:
matfiles = glob.glob('*.mat')
file = {}

#### load 15 files into dictionary (now load 2 files only for testing)

In [4]:
for i in range(0,2):
    file[i] = loadmat(matfiles[i])

#### Creating info about the EEG data, frankly I think you can just use this for all data when epoching

In [5]:
#mne.create_info(channel names (list of em', then sampling rate in Hz, then type of data this case is 'eeg')
info1=mne.create_info(channelNames,200,'eeg')

#### Removing useless keys

In [6]:
for i in range(0,2):
    del file[i]["__header__"]
    del file[i]["__version__"]
    del file[i]["__globals__"]

#### making a MNE readable array (meaning that the lib mne can work with this array) for clip 1 of file 1

In [7]:
for i in range(0,2):
    simulated_raw = mne.io.RawArray(file[i]['ww_eeg1'],info1)

Creating RawArray with float64 data, n_channels=62, n_times=47001
    Range : 0 ... 47000 =      0.000 ...   235.000 secs
Ready.
Creating RawArray with float64 data, n_channels=62, n_times=47001
    Range : 0 ... 47000 =      0.000 ...   235.000 secs
Ready.


#### creating equal sized epochs (time slices) for the data array we just created with duration of 1 second each "time slice" (could play around with that)

In [8]:
epoch = {}

In [9]:
for i in range(0,2):
    epoch[i] = mne.make_fixed_length_epochs(simulated_raw, duration=1, preload=True)

Not setting metadata
235 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 235 events and 200 original time points ...
0 bad epochs dropped
Not setting metadata
235 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 235 events and 200 original time points ...
0 bad epochs dropped


#### Turning back the mne format data into a ndarray (idk why but the extract features can only work with ndarrays)

In [10]:
epoch_array = {}

In [11]:
for i in range(0,2):
    epoch_array[i]=mne.Epochs.get_data(epoch[i])

#### Defining our own method for calculating differential entropy (might look different from the extracted features folder... maybe because it's gone through LDS?)

In [12]:
def compute_diffEnt(data):
    return scipy.stats.differential_entropy(data, axis=-1)

#### testing out extract_features with spectral entropy and we get an array that is simillar to the extracted features folder, but instead of 5 channels seems to only have 1 channels data (i.e. no beta gamma... etc.)

In [13]:
spectral_entropy = {}

In [14]:
selected_funcs = ['spect_entropy']
#200=smapling rate

for i in range(0,2):
    spectral_entropy[i] = extract_features(epoch_array[i],200,selected_funcs)
    print(pd.DataFrame(spectral_entropy[i]))

           0         1         2         3         4         5         6   \
0    3.580687  2.473215  4.613477  4.257474  4.244784  3.127960  4.692558   
1    2.462745  1.871569  3.493227  2.429035  4.270925  1.591576  3.158240   
2    4.811269  3.681855  5.284717  5.379361  4.788185  2.898684  4.366586   
3    5.172222  3.717879  5.703191  4.322530  3.761453  3.108639  3.962463   
4    2.811267  2.757529  3.706583  2.875836  3.636450  2.226725  3.040106   
..        ...       ...       ...       ...       ...       ...       ...   
230  5.857037  4.087057  5.662533  5.051508  3.958503  3.841689  5.665413   
231  5.955349  4.764758  4.753502  5.259684  4.326685  3.694039  5.165324   
232  5.122041  3.434168  4.818136  5.058526  3.173762  2.346565  4.335466   
233  5.507974  3.884937  5.180941  4.980429  5.075166  4.315954  4.785507   
234  6.031461  5.732160  6.093246  5.878301  5.215008  4.742081  5.561444   

           7         8         9   ...        52        53        54  \
0  

#### now we try it with our own defined function

In [15]:
differential_entropy = {}

In [16]:
selected_funcs = [('diffEnt', compute_diffEnt)]

for i in range(0,2):
    differential_entropy[i] = extract_features(epoch_array[i],200,selected_funcs)
    print(pd.DataFrame(differential_entropy[i]))

           0         1         2         3         4         5         6   \
0    4.728001  4.499821  4.968635  4.359168  4.380459  4.641259  4.308297   
1    5.352316  5.059323  5.140080  4.808622  4.476690  5.111398  4.692262   
2    4.707177  4.465248  4.528724  3.966115  4.149418  4.991914  4.505734   
3    4.356139  4.250580  4.428819  3.871476  4.310108  4.588731  4.265849   
4    4.859594  4.705022  4.764772  4.436136  4.393325  5.206766  4.681145   
..        ...       ...       ...       ...       ...       ...       ...   
230  4.513406  4.273676  4.584351  4.138657  4.390382  4.279692  4.218632   
231  4.426386  4.011812  4.475379  4.152102  4.104084  4.692884  4.416227   
232  4.255858  3.974260  4.531178  3.784958  4.139446  4.676390  4.325652   
233  4.281423  3.949277  4.648581  3.848982  4.010153  4.063748  4.021127   
234  4.142386  3.641504  4.444681  3.690856  3.791092  3.884043  3.877536   

           7         8         9   ...        52        53        54  \
0  

In [17]:
hjorth_mobility = {}

In [18]:
selected_funcs = [('hjorth_mobility_spect')]

for i in range(0,2):
    hjorth_mobility[i] = extract_features(epoch_array[i],200,selected_funcs)
    print(pd.DataFrame(hjorth_mobility[i]))

                0              1             2              3              4   \
0    610426.987394  193239.413213  1.986836e+06  348513.596901  284649.619313   
1    780180.078100  215907.941624  1.148550e+06  276083.556118  198658.108685   
2    640733.077349  223894.848157  8.648282e+05  196673.427519  178333.651581   
3    586251.940660  172509.641411  9.665396e+05  171621.628159  315957.168761   
4    572023.773446  221319.439112  1.062234e+06  156373.315422  186940.856362   
..             ...            ...           ...            ...            ...   
230  734497.382069  130722.587731  9.704141e+05  151651.146902  146012.284203   
231  785392.824860  189074.596479  7.246080e+05  182204.805923  204514.539142   
232  484062.825006  161126.119699  9.454983e+05  168830.073535  229418.150011   
233  789741.651041  159855.765832  1.076946e+06  169849.776901  201028.770719   
234  476698.031909  164031.464077  1.155323e+06  207436.588850  228401.743901   

                5          

## Label

In [31]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

In [32]:
def getExtracted_data():
    combined = np.zeros((1,311))
    matfiles = glob.glob('../ExtractedFeatures/*.mat')
    data= {}
    labels=loadmat(matfiles[45])
    matfiles.pop()
    label_list=np.ndarray.tolist(labels['label'])[0]
    rest_of_index=[2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    for x in range(0,45,3):
        #initial DE data must be done with clip_label[0]
        used_data = loadmat(matfiles[x])['de_LDS1']
        _, num, _ = used_data.shape
        used_label = np.zeros(num,) + label_list[0]
        used_data = np.swapaxes(used_data, 0, 1)
        used_data = np.reshape(used_data, (num, -1))
        used_label=used_label.reshape(num,1)   

        oneCom = np.hstack((used_data,used_label))
        combined = np.vstack((combined,oneCom))
        for ind,i in enumerate(rest_of_index):
            used_data = loadmat(matfiles[x])['de_LDS' + str(i)]
            _, num, _ = used_data.shape
            used_label = np.zeros(num,) + label_list[ind+1]
            used_data = np.swapaxes(used_data, 0, 1)
            used_data = np.reshape(used_data, (num, -1))
            used_label=used_label.reshape(num,1)
            oneCom = np.hstack((used_data,used_label))
            combined = np.vstack((combined,oneCom))
        combined = np.delete(combined, 0, axis=0)
    return combined

In [33]:
data_x = getExtracted_data()

In [34]:
label = data_x[:, 310]
label = np.reshape(label, (50896, 1))
data_x = np.delete(data_x, 310, 1)

In [35]:
label_count = label.shape[0]
label_count

50896

In [36]:
a = np.zeros((label_count, 2), float)

In [37]:
label = np.hstack((label, a))
print(label)

[[ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 ...
 [-1.  0.  0.]
 [-1.  0.  0.]
 [-1.  0.  0.]]


In [38]:
for i in range(label_count):
    if np.array_equal(label[i],([1, 0, 0])) == True:
        label[i] = [1, 0, 0]
    if np.array_equal(label[i],([0, 0, 0])) == True:
        label[i] = [0, 1, 0]
    if np.array_equal(label[i],([-1, 0, 0])) == True:
        label[i] = [0, 0, 1]

In [39]:
print(label)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 ...
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [40]:
#repeat to get 235 rows of labels for every file
all_label = np.repeat(label, repeats=234, axis=0)

In [41]:
print(all_label)

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 ...
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


#### Merging Arrays

In [45]:
files_zipping = {}

In [52]:
# array for all 15 matfiles
for i in range(0,2):
    files_zipping[i] = list(zip(spectral_entropy[i], differential_entropy[i], hjorth_mobility[i], all_label))
    print("file", i)
    print(pd.DataFrame(files_zipping[i]))

file 0
                                                     0  \
0    [3.580686520149464, 2.47321458596228, 4.613476...   
1    [2.462745161314667, 1.871568565987079, 3.49322...   
2    [4.8112685255351915, 3.6818553566846326, 5.284...   
3    [5.172222015240901, 3.7178788737211783, 5.7031...   
4    [2.811266832844941, 2.757528680647787, 3.70658...   
..                                                 ...   
230  [5.857036570883196, 4.087056587771543, 5.66253...   
231  [5.955349426213006, 4.764757559289417, 4.75350...   
232  [5.1220409938305025, 3.434168113233109, 4.8181...   
233  [5.507974369987037, 3.8849369782046774, 5.1809...   
234  [6.031460606093471, 5.732159740586171, 6.09324...   

                                                     1  \
0    [4.728000886521565, 4.499820662113705, 4.96863...   
1    [5.352316492953687, 5.059323141303814, 5.14008...   
2    [4.7071765304156425, 4.465247529756711, 4.5287...   
3    [4.356139135854431, 4.2505797279948805, 4.4288...   
4    [