# Prepare EEG data for training of machine-learning models
+ Import data.
+ Apply filters (bandpass).
+ Detect potential bad channels and replace them by interpolation.
+ Detect potential bad epochs and remove them.

## Import packages & links

In [1]:
# Import packages
import os
import sys
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sys.path.insert(0, os.path.dirname(os.getcwd()))

import mne
#%matplotlib inline
#from mayavi import mlab

In [2]:
from config import ROOT, PATH_CODE, PATH_DATA, PATH_OUTPUT, PATH_METADATA
PATH_CNTS = os.path.join(PATH_DATA, "35mnd mmn")

In [3]:
PATH_CNTS

'C:\\OneDrive - Netherlands eScience Center\\Project_ePodium\\Data\\EEGdata_Karin_Jan2020\\35mnd mmn'

In [4]:
filename_labels = PATH_METADATA + "Screening_children5a_summary_new.txt" 
metadata = pd.read_csv(filename_labels, sep='\t')
metadata.head()

Unnamed: 0,id_child,groupDDP,atRiskOrNotDDP,dyslexicAtMidGroup3DDP,assignment1,assignment2,assignment3,assignment4,childInfoPresent,relativeInfoPresent,mmr_2mth,mmr_5mth,mmr_11mth,mmr_17mth,mmr_23mth,mmr_29mth,mmr_35mth,mmr_41mth,mmr_47mth
0,1,4,unclear,1,notEnoughInfo,notEnoughInfo,notEnoughInfo,notEnoughInfo,1,1,1,0,1,1,1,1,1,1,1
1,2,missing,missing,missing,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,0,1,0,0,0,0,1,1,1,0,0
2,3,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,0,1,0,1,1,0
3,4,missing,missing,missing,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,1,1,0,1,1,0,1,0
4,5,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,0,1,1,1,0


In [5]:
metadata.shape

(336, 19)

## Search all *.cnt files and check for how many we have a label

In [6]:
import fnmatch
import warnings
warnings.filterwarnings('ignore')

import helper_functions

dirs = os.listdir(PATH_CNTS)
cnt_files = fnmatch.filter(dirs, "*.cnt")

In [7]:
found_ids = [x[:3] for x in cnt_files]
idx = np.where(np.array(found_ids) == '036')[0]
[cnt_files[x] for x in idx]

[]

In [8]:
metadata[metadata['id_child'] == '036']['groupDDP'].values[0]

'3Ctrl'

In [9]:
labels = []
np.random.seed(0)

found_ids = [x[:3] for x in cnt_files]
for ID in list(set(found_ids)): 
    idx = np.where(np.array(found_ids) == ID)[0]
    filenames = [cnt_files[x] for x in idx]
    label = metadata[metadata['id_child'] == ID]['groupDDP'].values[0]
    label_risk = metadata[metadata['id_child'] == ID]['assignment4'].values[0]
    if label == '1FRdys':
        label = 1
    elif label == '2FRndys':
        label = 0
    elif label == '3Ctrl': #TODO: check if this is correct!
        label = 0
    labels.append([ID, label, label_risk, filenames])

In [10]:
labels[:10]

[['177',
  0,
  'notAtRisk_rest',
  ['177_35_jr_mmn36_2_wk.cnt', '177_35_jr_mmn36_wk.cnt']],
 ['162', 0, 'notEnoughInfo', ['162_35_mr_mmn36_wk.cnt']],
 ['348',
  0,
  'notAtRisk_highestScores',
  ['348_35_jc_mmn25_wk.cnt', '348_35_jc_mmn6_wk.cnt']],
 ['009', 0, 'notAtRisk_rest', ['009_35_jc_mmn36_wk.cnt']],
 ['486', 0, 'atRisk', ['486_35_jd_mmn25_wk.cnt', '486_35_jd_mmn36_wk.cnt']],
 ['313',
  0,
  'notAtRisk_rest',
  ['313_35_mc_mmn25_wk.cnt', '313_35_mc_mmn36_wk.cnt']],
 ['428',
  'missing',
  'atRisk',
  ['428_35_md_mmn25_wk.cnt', '428_35_md_mmn36_wk.cnt']],
 ['480', 0, 'atRisk', ['480_35_jd_mmn25_wk.cnt', '480_35_jd_mmn36_wk.cnt']],
 ['141', 0, 'missing', ['141_35_jr_mmn36_wk.cnt']],
 ['422', 0, 'notEnoughInfo', ['422_35_jd_mmn36_wk.cnt']]]

In [11]:
len(labels), len(list(set(found_ids)))

(162, 162)

### Count number (and type) of labels found:

In [12]:
labels_known = 0
labels_unknown = 0
labels_type = []

for x in labels:
    if x[1] == 1: #'dyslexic'
        labels_known += 1
        labels_type.append(1)
    elif x[1] == 0: #'non-dyslexic'
        labels_known += 1
        labels_type.append(0)
    else: # missing or unclear
        labels_unknown += 1  
        labels_type.append('missing')
        
print("Data with proper labels:", labels_known, "||| Data without proper label:", labels_unknown)     

Data with proper labels: 136 ||| Data without proper label: 26


In [13]:
print("Data for 'dyslexic':", labels_type.count(1))
print("Data for 'non-dyslexic':", labels_type.count(0))

Data for 'dyslexic': 35
Data for 'non-dyslexic': 101


In [14]:
# Check types of risk group labels found
labels_risktype = [x[2] for x in labels]
list(set(labels_risktype))

['missing',
 'notEnoughInfo',
 'notAtRisk_rest',
 'notAtRisk_highestScores',
 'atRisk']

In [15]:
metadata['atRiskOrNotDDP'][:10]

0      unclear
1      missing
2    notAtRisk
3      missing
4    notAtRisk
5    notAtRisk
6    notAtRisk
7    notAtRisk
8    notAtRisk
9      unclear
Name: atRiskOrNotDDP, dtype: object

In [16]:
group_notrisk = np.array(1*((metadata['atRiskOrNotDDP'] == 'notAtRisk')
                   | (metadata['assignment4'].isin(['notAtRisk_rest', 'notAtRisk_highestScores']))))

group_risk = np.array(1*((metadata['atRiskOrNotDDP'] == 'atRisk')
                   | (metadata['assignment4'] == 'at risk')))

In [17]:
np.sum(group_risk) + np.sum(group_notrisk)

307

In [18]:
label_risk = group_notrisk + 2*group_risk 
label_risk[label_risk == 3] = 2
label_risk = label_risk -1

In [19]:
label_risk

array([-1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0,  0,
        0,  0, -1, -1,  1, -1,  1,  0, -1,  0,  1,  1,  1, -1, -1,  0,  1,
        1,  0,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1,
        1,  1,  0,  1,  1,  1,  1,  1,  0, -1,  1,  1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,
        0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0, -1,
        0,  0,  0,  0,  0,  0, -1, -1,  1,  1, -1,  0, -1,  1,  1,  1,  1,
        1,  1,  0,  1,  1,  1,  1, -1, -1, -1,  1,  1,  1,  0,  1,  1,  1,
        1,  1,  1,  1, -1,  1,  1,  0, -1, -1,  1,  1,  1,  0,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1,  1,  1,  1

In [20]:
group_notdys = np.array(1*(metadata['groupDDP'].isin(['1FRdys', '3Ctrl'])))

group_dys = np.array(1*(metadata['groupDDP'] == '2FRndys'))

In [21]:
np.sum(group_notdys) + np.sum(group_dys)

252

In [22]:
label_dys = group_notdys + 2*group_dys 
label_dys[label_dys == 3] = 2
label_dys = label_dys -1

In [23]:
label_dys

array([-1, -1,  0, -1,  0,  0,  0,  0,  0, -1, -1, -1, -1,  0,  0,  0,  0,
        0,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0,  0, -1, -1,  0,  0,  0,
        0,  0, -1, -1,  1, -1,  1, -1, -1, -1,  1,  1,  0, -1, -1, -1,  0,
        0, -1,  1,  1,  0,  0,  1,  1, -1,  0,  0,  1,  1, -1, -1,  1, -1,
        0,  1, -1,  1,  1,  0,  1,  0, -1, -1,  0,  1,  1,  0,  1, -1,  1,
        1,  1,  0,  1,  0,  1,  0,  1, -1,  1,  0,  1, -1,  0,  1,  0,  0,
        1,  0, -1,  0,  0, -1,  0,  1,  1,  1,  1,  1,  1,  0,  0, -1, -1,
        0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,
        0,  0,  0, -1,  0,  0,  0, -1, -1, -1,  0,  0,  0,  0,  0,  0, -1,
       -1,  0,  0,  0,  0,  0, -1, -1,  1,  0, -1,  0, -1,  0,  0,  0,  0,
        0,  0, -1,  1,  1,  1,  1, -1, -1, -1,  1,  0,  1, -1,  1,  0,  0,
        1,  1,  0,  1, -1,  1,  0, -1, -1, -1,  1,  0,  1,  0,  1,  0,  1,
        1,  1,  1,  0,  0,  1,  0,  1,  1,  1,  0,  1, -1,  1,  1,  1,  0,
        1,  1,  0,  1,  1

## create Dataframe with labels to be used

In [24]:
labels_final = pd.DataFrame(data=metadata['id_child'].values, columns=['id_child'])
labels_final['label_dys'] = label_dys
labels_final['label_risk'] = label_risk
labels_final.head()

Unnamed: 0,id_child,label_dys,label_risk
0,1,-1,-1
1,2,-1,0
2,3,0,0
3,4,-1,0
4,5,0,0


In [25]:
print("Data for 'at risk':", labels_risktype.count('atRisk'))
print("Data for 'notAtRisk_rest':", labels_risktype.count('notAtRisk_rest'))
print("Data for 'notAtRisk_highestScores':", labels_risktype.count('notAtRisk_highestScores'))

Data for 'at risk': 52
Data for 'notAtRisk_rest': 36
Data for 'notAtRisk_highestScores': 27


In [26]:
labels_risktype = [x[2] for x in labels if x[1] in [1,0]]
print("Data for 'at risk':", labels_risktype.count('atRisk'))
print("Data for 'notAtRisk_rest':", labels_risktype.count('notAtRisk_rest'))
print("Data for 'notAtRisk_highestScores':", labels_risktype.count('notAtRisk_highestScores'))

Data for 'at risk': 42
Data for 'notAtRisk_rest': 31
Data for 'notAtRisk_highestScores': 23


In [27]:
metadata.loc[(metadata['groupDDP'].isin(['1FRdys', '2FRndys', '3Ctrl']) 
              & metadata['assignment4'].isin(['at risk', 'notAtRisk_rest', 'notAtRisk_highestScores', ]))]

Unnamed: 0,id_child,groupDDP,atRiskOrNotDDP,dyslexicAtMidGroup3DDP,assignment1,assignment2,assignment3,assignment4,childInfoPresent,relativeInfoPresent,mmr_2mth,mmr_5mth,mmr_11mth,mmr_17mth,mmr_23mth,mmr_29mth,mmr_35mth,mmr_41mth,mmr_47mth
2,003,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,0,1,0,1,1,0
4,005,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,0,1,1,1,0
5,006,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,0,1,1,1,0,1,0
8,009,3Ctrl,notAtRisk,1,notEnoughInfo,notEnoughInfo,notEnoughInfo,notAtRisk_rest,1,1,1,0,1,1,1,1,1,1,0
15,016,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,1,1,0,0,0,0,0,0
16,017,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,1,0,0,1,0
17,018,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,1,1,1,1,0
19,021,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,1,1,1,1,0
21,023,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,0,1,1,1,1,0
22,024,3Ctrl,notAtRisk,0,notAtRisk_rest,notAtRisk_rest,notAtRisk_rest,notAtRisk_rest,1,1,1,0,1,0,1,1,0,1,0


# Workflow data processing
1. Load cnt files.
2. Select same number of channels (here: 30 same channels which exist for both 30 and 62 channel data)
3. Preprocess raw data (bandpass + detect outliers and 'bad' epochs).
4. Store epoch data and event type as array

## LABELS:
+ After Karin's search we have proper labels for much more files!  


In [28]:
from importing import read_cnt_file, standardize_EEG

In [32]:
# Initialize array
signal_collection = np.zeros((0,30,501)) #62
label_collection = []
ID_collection = []
metadata_collection = []

collect_in_one_array = False

for i, filename in enumerate(cnt_files):
    
    # First check if we have proper label for that file
    # -----------------------------------------------------------
    
    ID = filename[:3]
    label = labels_final[labels_final['id_child'] == ID]['label_dys'].values[0]
    label_risk = labels_final[labels_final['id_child'] == ID]['label_risk'].values[0]
    #label = metadata[metadata['id_child'] == ID]['groupDDP'].values[0]
    #label_risk = metadata[metadata['id_child'] == ID]['assignment4'].values[0]
    
    if (label < 0) or (label_risk < 0):
        print("No proper label found for file: ", filename)
    else:
        #label_group = int(metadata[metadata["file"].str.match(filename[:-4])]['group'])
        label_group = 'dys' + str(label) + '_risk' + str(label_risk)
        
        print(40*"=")
        print("Importing file: ",filename)
        print("Data belongs into group: ", label_group)

        # Import data and events
        file = os.path.join(PATH_CNTS, filename)

        signal_collect, label_collect, ch_names = read_cnt_file(file, 
                                                                  label_group,
                                                                  event_idx = [3, 13, 66],
                                                                  channel_set = "30",
                                                                  tmin = -0.2,
                                                                  tmax = 0.8,
                                                                  lpass = 0.5, 
                                                                  hpass = 40, 
                                                                  threshold = 5, 
                                                                  max_bad_fraction = 0.2)
        
        
        # Standardize data
        # --------------------------------------------------------
        if signal_collect is not None:
            signal_collect = standardize_EEG(signal_collect,
                                 std_aim = 1,                   
                                 centering = 'per_channel',
                                 scaling = 'global')
        
        # Save data and labels
        # ---------------------------------------------------------
        if signal_collect is not None:
            
            if collect_in_one_array:

                # Get signals as array and add to total collection
                print(signal_collect.shape, len(label_collect))
                signal_collection = np.concatenate((signal_collection, signal_collect), axis=0)
                label_collection += label_collect

            else:
                if len(label_collect) > 1:
                #if label_collect is not None:
                    file = os.path.join(PATH_OUTPUT, "processed_data_" + filename[:-4] + ".npy")
                    np.save(file, signal_collect)

                    #filename = os.path.join(PATH_OUTPUT, "EEG_data_30ch_1s_corrected_metadata_ID"+ ID + ".csv")
                    file = os.path.join(PATH_OUTPUT, "processed_data_" + filename[:-4] + ".csv")

                    with open(file, 'w', newline='') as csvFile:
                        writer = csv.writer(csvFile)
                        writer.writerow(label_collect)
                    csvFile.close()
            
            ID_collection += [ID] * len(label_collect) 
            metadata_collection.append((i, filename, signal_collection.shape[0], ch_names))

No proper label found for file:  001_35_jc_mmn36_wk.cnt
No proper label found for file:  002_35_jc_mmn36_wk.cnt
Importing file:  003_35_jc_mmn36_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 742979  =      0.000 ...  1485.958 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 26 bad epochs in a total of 11  channels.
Marked 26 bad epochs in a total of 800  epochs.
Found 7 bad epochs in a total of 6  ch

- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 910 bad epochs in a total of 29  channels.
Found bad channel (more than 289.6  bad epochs): Channel no:  9
Found bad channel (more than 289.6  bad epochs): Channel no:  6
Marked 614 bad epochs in a total of 1448  epochs.
Found 118 bad epochs in a total of 22  channels.
Found bad channel (more than 36.2  bad epochs): Channel no:  9
Found bad channel (more than 36.2  bad epochs): Channel no:  6
Marked 94 bad epochs in a total of 181  epochs.
Found 109 bad epochs in a total of 24  channels.
Found bad channel (more than 36.2  bad epochs): Channel no:  9
Found bad cha


Used Annotations descriptions: ['0', '13', '3', '4', '66']
Found 145 bad epochs in a total of 21  channels.
Marked 145 bad epochs in a total of 800  epochs.
Found 22 bad epochs in a total of 11  channels.
Marked 22 bad epochs in a total of 100  epochs.
Found 21 bad epochs in a total of 11  channels.
Marked 21 bad epochs in a total of 100  epochs.
Importing file:  030_35_jc_mmn36_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 749539  =      0.000 ...  1499.078 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.

- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['12', '2', '55']
Importing file:  104_35_jr_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 371319  =      0.000 ...   742.638 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Us

Importing file:  114_35_jr_mmn36_2_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 375299  =      0.000 ...   750.598 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 13 bad epochs in a total of 16  channels.
Marked 13 bad epochs in a total of 400  epochs.
Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 50  epochs.
Found 2 bad epochs in a total of 3  channels.
Marked 2 bad ep

Found 6 bad epochs in a total of 8  channels.
Marked 6 bad epochs in a total of 100  epochs.
Found 4 bad epochs in a total of 5  channels.
Marked 4 bad epochs in a total of 100  epochs.
Importing file:  124_35_jr_mmn25_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 369079  =      0.000 ...   738.158 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['12', '2', '55']
Importing file:  124_35_jr_mmn36_wk.cnt
Data belongs into group: 

Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 100  epochs.
No outliers found with given threshold.
No proper label found for file:  137_35_mr_mmn36_2_wk.cnt
No proper label found for file:  137_35_mr_mmn36_wk.cnt
Importing file:  138_35_jr_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 750799  =      0.000 ...  1501.598 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Foun

- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 4 bad epochs in a total of 6  channels.
Marked 4 bad epochs in a total of 541  epochs.
Found 2 bad epochs in a total of 2  channels.
Marked 2 bad epochs in a total of 67  epochs.
No outliers found with given threshold.
Importing file:  147_35_jd_mmn36.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 448539  =      0.000 ...   897.078 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 

- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 100 bad epochs in a total of 24  channels.
Marked 100 bad epochs in a total of 800  epochs.
Found 12 bad epochs in a total of 13  channels.
Marked 12 bad epochs in a total of 100  epochs.
Found 12 bad epochs in a total of 10  channels.
Marked 12 bad epochs in a total of 100  epochs.
Importing file:  153_35_mr_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 743519  =      0.000 ...  1487.038 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth

- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 6 bad epochs in a total of 30  channels.
Marked 6 bad epochs in a total of 800  epochs.
No outliers found with given threshold.
No outliers found with given threshold.
Importing file:  163_35_jr_mmn36_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 749099  =      0.000 ...  1498.198 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 pa

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 5 bad epochs in a total of 3  channels.
Marked 5 bad epochs in a total of 800  epochs.
No outliers found with given threshold.
No outliers found with given threshold.
Importing file:  176_35_mr_mmn36_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 757179  =      0.000 ...  1514.358 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.

- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 192 bad epochs in a total of 24  channels.
Marked 192 bad epochs in a total of 800  epochs.
Found 27 bad epochs in a total of 14  channels.
Marked 27 bad epochs in a total of 100  epochs.
Found 25 bad epochs in a total of 18  channels.
Marked 25 bad epochs in a total of 100  epochs.
Importing file:  181_35_jr_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 739639  =      0.000 ...  1479.278 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00

- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '12', '2', '55']
Importing file:  309_35_jc_mmn36_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 723579  =      0.000 ...  1447.158 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 2 bad epochs in a total of 1  channels.
Marked 2 bad epochs in a total of 800  epochs.
Found 1 bad epochs in a total of 2  channels.
Marked 1 b


FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 5 bad epochs in a total of 4  channels.
Marked 5 bad epochs in a total of 800  epochs.
No outliers found with given threshold.
Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 100  epochs.
Importing file:  314_35_mc_mmn25_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 726419  =      0.000 ...  1452.838 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter paramete

Found 2 bad epochs in a total of 27  channels.
Marked 2 bad epochs in a total of 100  epochs.
Found 1 bad epochs in a total of 2  channels.
Marked 1 bad epochs in a total of 100  epochs.
Importing file:  321_35_mc_mmn36_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 725799  =      0.000 ...  1451.598 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 53 bad epochs in a total of 14  channels.
Marked 53 bad ep

- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['12', '2', '55']
Importing file:  334_35_mc_mmn36_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 727399  =      0.000 ...  1454.798 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 19 bad epochs in a total of 30  channels.
Marked 19 bad epochs in a total of 800  epochs.
Found 6 bad epochs in a total of 6  channels.
Marked 6 bad epoc

Found 2 bad epochs in a total of 3  channels.
Marked 2 bad epochs in a total of 100  epochs.
Importing file:  348_35_jc_mmn25_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 726839  =      0.000 ...  1453.678 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['12', '2', '55']
Importing file:  348_35_jc_mmn6_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 726339  =      0.000 ...  1452.678 secs...
Filtering raw data in 1 c

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['12', '2', '55']
Importing file:  412_35_jd_mmn36_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 728319  =      0.000 ...  1456.638 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming 

Found 5 bad epochs in a total of 13  channels.
Marked 5 bad epochs in a total of 100  epochs.
Found 5 bad epochs in a total of 16  channels.
Marked 5 bad epochs in a total of 100  epochs.
Importing file:  424_35_jd_mmn25_wk_.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 736299  =      0.000 ...  1472.598 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '12', '2', '55']
Importing file:  424_35_jd_mmn36_wk.cnt
Data belongs into


FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '12', '2', '55']
Importing file:  434_35_jd_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 725559  =      0.000 ...  1451.118 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband 

- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['12', '2', '55']
Importing file:  441_35_md_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 725339  =      0.000 ...  1450.678 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 48 bad epochs in a total of 14  channels.
Marked 48 bad epochs in a total of 800  epochs.
Found 3 bad epochs in a total of 5  channels.
Marked 3 bad epoc

- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '12', '2', '55']
Importing file:  456_35_md_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 733879  =      0.000 ...  1467.758 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 4

- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 32 bad epochs in a total of 30  channels.
Marked 32 bad epochs in a total of 800  epochs.
Found 6 bad epochs in a total of 5  channels.
Marked 6 bad epochs in a total of 100  epochs.
Found 5 bad epochs in a total of 7  channels.
Marked 5 bad epochs in a total of 100  epochs.
Importing file:  478_35_jd_mmn25_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 729559  =      0.000 ...  1459.118 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple 

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '12', '2', '55']
Importing file:  485_35_md_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 730519  =      0.000 ...  1461.038 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Ham


FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 16 bad epochs in a total of 30  channels.
Marked 16 bad epochs in a total of 800  epochs.
Found 3 bad epochs in a total of 2  channels.
Marked 3 bad epochs in a total of 100  epochs.
Found 3 bad epochs in a total of 3  channels.
Marked 3 bad epochs in a total of 100  epochs.
Importing file:  491_35_jd_mmn36_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 728879  =      0.000 ...  1457.758 secs...
Filtering raw data in 1 contiguous segment
Setting up ban

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 21 bad epochs in a total of 11  channels.
Marked 21 bad epochs in a total of 800  epochs.
Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 100  epochs.
Found 6 bad epochs in a total of 7  channels.
Marked 6 bad epochs in a total of 100  epochs.
Importing file:  601-109-35m-jc-mmn36-slp.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 7

In [52]:
label_collect[:10]

['3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1',
 '3dys1_risk1']

# Save entire processed dataset:
--> for this: run above code with ``collect_in_one_array = True``

In [75]:
filename = os.path.join(PATH_OUTPUT, "EEG_data_30channels_1s_corrected.npy")
np.save(filename, signal_collection)

filename = os.path.join(PATH_OUTPUT, "EEG_data_30channels_1s_corrected_labels.npy")
np.save(filename, label_collection)

import csv
filename = os.path.join(PATH_OUTPUT, "EEG_data_30channels_1s_corrected_metadata.csv")

with open(filename, 'w') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerows(metadata_collection)
csvFile.close()