# Hypothesis 1

There is a relation between EEG signal and 3 levels of stress.

For each pair of person and task, there are 3 distinct groups of EEG signals representing the 3 levels of stress.

In [18]:
from utils import *
import pandas as pd
import mne
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [28]:
# start : begin of question
# T: answer correct
# F: answer incorrect
# S: not answer in time
# O: unexpected key press
marker_names = [0,
            'math,LowStress,start',
            'math,LowStress,T',
            'math,LowStress,F',
            'math,LowStress,S',
            'math,LowStress,O',
            'math,MildStress,start',
            'math,MildStress,T',
            'math,MildStress,F',
            'math,MildStress,S',
            'math,MildStress,O',
            'math,HigherStress,start',
            'math,HigherStress,T',
            'math,HigherStress,F',
            'math,HigherStress,S',
            'math,HigherStress,O',
            'fixation,None,None']

In [32]:
# task = "mathcontrol" 
task = "mathstress"

files = glob(f"data/*_{task}_*.csv")
names = []
pd_raws = []
raws = []
with tqdm(files) as pbar:
    for index, f in enumerate(pbar):
        name, task, t_stamp = f.split('/')[1].split('_')
        names.append(name)
        pbar.set_description(f"{index} {name}")
        pd_raw = pd.read_csv(f, dtype={'Marker':str})
        pd_raw = marker_converter(pd_raw,marker_names)
        pd_raws.append(pd_raw)

print(f"index\tname\tfile_name")
print("="*30)
for index in range(len(names)):
    print(f"{index}\t{names[index]}\t{files[index]}")

  0%|          | 0/7 [00:00<?, ?it/s]

index	name	file_name
0	prin	data/prin_mathstress_2021-08-13-06.37.39.csv
1	pumpath	data/pumpath_mathstress_2021-08-12-03.22.27.csv
2	krittithee	data/krittithee_mathstress_2021-08-13-04.47.53.csv
3	nuttasit	data/nuttasit_mathstress_2021-08-12-08.09.53.csv
4	nutcha	data/nutcha_mathstress_2021-08-10-12.18.33.csv
5	surangrat	data/surangrat_mathstress_2021-08-12-05.08.24.csv
6	dusadee	data/dusadee_mathstress_2021-08-12-06.43.45.csv


In [44]:
raws = []
sampling_rate = 250 # Hz
with tqdm(pd_raws) as pbar:
    for index,pd_raw in enumerate(pbar):
        pbar.set_description(f"{index} {names[index]}")
        raw = dataframe_to_raw(pd_raw, sfreq=sampling_rate)
        raws.append(raw)

  0%|          | 0/7 [00:00<?, ?it/s]

In [None]:

# Preprocess
raw.notch_filter([50,100],filter_length='auto', phase='zero', verbose=False) # Line power
raw.filter(1, None, verbose=False) # Slow drift
pass
# ica = mne.preprocessing.ICA(n_components=8, max_iter='auto')
# ica.fit(raw, verbose=False)
# raw = ica.apply(raw, verbose=False)

In [None]:
events = mne.find_events(raw, stim_channel='Marker', initial_event=True, verbose=False, uint_cast=False)
marker_names = ['math,LowStress,start',
                'math,LowStress,T',
                'math,LowStress,F',
                'math,LowStress,S',
                'math,MildStress,start',
                'math,MildStress,T',
                'math,MildStress,F',
                'math,MildStress,S',
                'math,HigherStress,start',
                'math,HigherStress,T',
                'math,HigherStress,F',
                'math,HigherStress,S',
                'fixation,None,None']
interested_marker_names = ['math,LowStress,start','math,MildStress,start','math,HigherStress,start','fixation,None,None']

interested_markers = [ marker_idx.index(name) for name in interested_marker_names ]

for name in marker_names:
    task, level, m = name.split(',')
    if(name not in marker_idx): continue
    number = sum(events[::,2] == marker_idx.index(name))
    if(m == 'start'): print(f"Number of {level} questions: {number}")
    if(m == 'T'): print(f"  Correct: {number}")
    if(m == 'F'): print(f"  Wrong: {number}")
    if(m == 'S'): print(f"  Slow: {number}")

uninterested_markers = list(set(np.unique(events[::,2])).difference(set(interested_markers)))
for marker in uninterested_markers:
    events = np.delete(events, np.argwhere(events[:,2] == marker), axis=0) 

In [None]:
# Here we only get the event that indicate the begining of the block and fixation.
interested_events = []
prev_is_Low_start = False
prev_is_Mil_start = False
prev_is_Hig_start = False
Low_start = marker_idx.index('math,LowStress,start')
Mil_start = marker_idx.index('math,MildStress,start')
Hig_start = marker_idx.index('math,HigherStress,start')

for event in events:
    if(event[2] == Low_start and prev_is_Low_start == False):
        interested_events.append(list(event))
        prev_is_Low_start = True
    elif(event[2] != Low_start):
        prev_is_Low_start = False

    if(event[2] == Mil_start and prev_is_Mil_start == False):
        interested_events.append(list(event))
        prev_is_Mil_start = True
    elif(event[2] != Mil_start):
        prev_is_Mil_start = False

    if(event[2] == Hig_start and prev_is_Hig_start == False):
        interested_events.append(list(event))
        prev_is_Hig_start = True
    elif(event[2] != Hig_start):
        prev_is_Hig_start = False


    if(event[2] == marker_idx.index('fixation,None,None')):
        interested_events.append(list(event))
    # print(event)
interested_events = np.array(interested_events)
interested_events

In [None]:
# Here we split the block into chuncks
chunck_size = 5 # second

m_start = interested_events[0::2]
m_stop = interested_events[1::2]
chunck_events = []
for index, (start, stop) in enumerate(zip(m_start, m_stop)):
    # [time_stamp, _, marker]
    a = start[2] in interested_markers
    b = stop[2] == marker_idx.index('fixation,None,None')
    if((a and b) == False):
        raise ValueError(f"at {index}, found unexpected marker order. start='{marker_idx[start[2]]}' stop='{marker_idx[stop[2]]}'")
    for i in range(start[0], start[0]+(30*250)+1, chunck_size*250):
        if(i + chunck_size*250 >= stop[0]):
            raise ValueError(f"start stop is incorrect.")
        chunck_events.append([i,0,start[2]])

chunck_events = np.array(chunck_events)
num_low = sum(chunck_events[::,2] == marker_idx.index('math,LowStress,start'))
num_mil = sum(chunck_events[::,2] == marker_idx.index('math,MildStress,start'))
num_hig = sum(chunck_events[::,2] == marker_idx.index('math,HigherStress,start'))
if(num_low != num_mil or num_mil != num_hig):
    raise ValueError(f"The number of chuncked events is not equal. LowStress={num_low} MildStress={num_mil} HigherStress={num_hig}")
print(f"Number of samples of each block: {num_low}")
num_chunck = num_low


In [None]:
t_start, t_stop = 0.3, 4.8
epochs = mne.Epochs(raw, chunck_events, tmin=t_start, tmax=t_stop, baseline=(t_start,t_stop), verbose=False)

In [None]:
# 0,4 Delta
# 4,8 Theta
# 8,16 Alpha ***
# 16,32 Beta 
# 32,64 Noisy Gamma
# 64,128 Noisy Signal
# frequencies = np.arange(8, 32+1, 8)
frequencies = np.array([10,11,12,13,14])
features = []
labels = []
for index, evoked in enumerate(epochs.iter_evoked()):
    event = evoked.comment
    tfr_obj = mne.time_frequency.tfr_morlet(evoked, n_cycles=3, return_itc=False, freqs=frequencies, decim=3)
    # (channel,freq,time-window)
    coeff = tfr_obj.data 
    # (freq,time-window)
    coeff = coeff.mean(axis=0)
    # alpha mean
    coeff = coeff.mean()
    features.append(coeff)
    labels.append(event)
# features = np.array(features)
# features.shape

In [None]:
from sklearn.preprocessing import normalize
f_norm = normalize(np.array(features).reshape(-1,1), axis=0, norm='max')
# f_norm = np.array(features).reshape(-1,1)
plt.scatter(range(num_chunck*0,num_chunck*1),f_norm[num_chunck*0:num_chunck*1])
plt.scatter(range(num_chunck*1,num_chunck*2),f_norm[num_chunck*1:num_chunck*2])
plt.scatter(range(num_chunck*2,num_chunck*3),f_norm[num_chunck*2:num_chunck*3])