# Prepare EEG data for training of machine-learning models
+ Import data.
+ Apply filters (bandpass).
+ Detect potential bad channels and replace them by interpolation.
+ Detect potential bad epochs and remove them.

## Import packages & links

In [1]:
# Import packages
import os
import sys
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sys.path.insert(0, os.path.dirname(os.getcwd()))

import mne
#%matplotlib inline
#from mayavi import mlab

In [10]:
from config import ROOT, PATH_CODE, PATH_DATA, PATH_OUTPUT, PATH_METADATA


### Update path!

In [37]:
PATH_CNTS = os.path.join(PATH_DATA, "41mnd mmn")
PATH_OUTPUT = os.path.join(PATH_DATA, 'processed_data_41mnd')

In [11]:
filename_labels = PATH_METADATA + "Screening_children5a_summary_new.txt" 
metadata = pd.read_csv(filename_labels, sep='\t')
metadata.head()

Unnamed: 0,id_child,groupDDP,atRiskOrNotDDP,dyslexicAtMidGroup3DDP,assignment1,assignment2,assignment3,assignment4,childInfoPresent,relativeInfoPresent,mmr_2mth,mmr_5mth,mmr_11mth,mmr_17mth,mmr_23mth,mmr_29mth,mmr_35mth,mmr_41mth,mmr_47mth
0,1,4,unclear,1,notEnoughInfo,notEnoughInfo,notEnoughInfo,notEnoughInfo,1,1,1,0,1,1,1,1,1,1,1
1,2,missing,missing,missing,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,0,1,0,0,0,0,1,1,1,0,0
2,3,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,0,1,0,1,1,0
3,4,missing,missing,missing,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,1,1,0,1,1,0,1,0
4,5,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,0,1,1,1,0


In [12]:
metadata.shape

(336, 19)

## Search all *.cnt files and check for how many we have a label

In [13]:
import fnmatch
import warnings
warnings.filterwarnings('ignore')

import helper_functions

dirs = os.listdir(PATH_CNTS)
cnt_files = fnmatch.filter(dirs, "*.cnt")

In [14]:
found_ids = [x[:3] for x in cnt_files]
idx = np.where(np.array(found_ids) == '036')[0]
[cnt_files[x] for x in idx]

[]

In [15]:
metadata[metadata['id_child'] == '036']['groupDDP'].values[0]

'3Ctrl'

In [16]:
labels = []

found_ids = [x[:3] for x in cnt_files]
for ID in list(set(found_ids)): 
    idx = np.where(np.array(found_ids) == ID)[0]
    filenames = [cnt_files[x] for x in idx]
    label = metadata[metadata['id_child'] == ID]['groupDDP'].values[0]
    label_risk = metadata[metadata['id_child'] == ID]['assignment4'].values[0]
    if label == '1FRdys':
        label = 1
    elif label == '2FRndys':
        label = 0
    elif label == '3Ctrl': #TODO: check if this is correct!
        label = 0
    labels.append([ID, label, label_risk, filenames])

In [8]:
labels[:10]

[['304',
  'missing',
  'notAtRisk_highestScores',
  ['304_17_mc_mmn_25_wk.cnt', '304_17_mc_mmn_36_wk.cnt']],
 ['181',
  0,
  'notAtRisk_rest',
  ['181_17_jr_mmn25_slp.cnt',
   '181_17_jr_mmn36.cnt',
   '181_17_jr_mmn36_2.cnt',
   '181_17_jr_mmn36_slp.cnt']],
 ['323',
  0,
  'notAtRisk_highestScores',
  ['323_17_mc_mmn25_2_slp.cnt',
   '323_17_mc_mmn25_wk.cnt',
   '323_17_mc_mmn36_wk.cnt']],
 ['030',
  0,
  'notEnoughInfo',
  ['030_17_jc_bakdaksw.cnt',
   '030_17_jc_mmn25_wk.cnt',
   '030_17_jc_mmn36_wk.cnt',
   '030_17_jc_mmn36_wk_2.cnt']],
 ['017',
  0,
  'notAtRisk_highestScores',
  ['017_17_jc_mmn25_wk.cnt', '017_17_jc_mmn36.cnt']],
 ['021',
  0,
  'notAtRisk_highestScores',
  ['021_17_jc_mmn25_wk.cnt', '021_17_jc_mmn36_wk_2.cnt']],
 ['140',
  1,
  'notEnoughInfo',
  ['140_17_mr_mmn25_wk.cnt',
   '140_17_mr_mmn36_2.cnt',
   '140_17_mr_mmn36_wk.cnt',
   '140_17_mr_mmn47.cnt']],
 ['335',
  0,
  'notAtRisk_highestScores',
  ['335_17_mc_mmn25_wk.cnt', '335_17_mc_mmn36_wk.cnt']],
 ['138

In [17]:
len(labels), len(list(set(found_ids)))

(245, 245)

### Count number (and type) of labels found:

In [18]:
labels_known = 0
labels_unknown = 0
labels_type = []

for x in labels:
    if x[1] == 1: #'dyslexic'
        labels_known += 1
        labels_type.append(1)
    elif x[1] == 0: #'non-dyslexic'
        labels_known += 1
        labels_type.append(0)
    else: # missing or unclear
        labels_unknown += 1  
        labels_type.append('missing')
        
print("Data with proper labels:", labels_known, "||| Data without proper label:", labels_unknown)     

Data with proper labels: 210 ||| Data without proper label: 35


In [19]:
print("Data for 'dyslexic':", labels_type.count(1))
print("Data for 'non-dyslexic':", labels_type.count(0))

Data for 'dyslexic': 52
Data for 'non-dyslexic': 158


In [20]:
# Check types of risk group labels found
labels_risktype = [x[2] for x in labels]
list(set(labels_risktype))

['atRisk',
 'notAtRisk_rest',
 'notAtRisk_highestScores',
 'missing',
 'notEnoughInfo']

In [21]:
metadata['atRiskOrNotDDP'][:10]

0      unclear
1      missing
2    notAtRisk
3      missing
4    notAtRisk
5    notAtRisk
6    notAtRisk
7    notAtRisk
8    notAtRisk
9      unclear
Name: atRiskOrNotDDP, dtype: object

In [22]:
group_notrisk = np.array(1*((metadata['atRiskOrNotDDP'] == 'notAtRisk')
                   | (metadata['assignment4'].isin(['notAtRisk_rest', 'notAtRisk_highestScores']))))

group_risk = np.array(1*((metadata['atRiskOrNotDDP'] == 'atRisk')
                   | (metadata['assignment4'] == 'at risk')))

In [23]:
np.sum(group_risk) + np.sum(group_notrisk)

307

In [24]:
label_risk = group_notrisk + 2*group_risk 
label_risk[label_risk == 3] = 2
label_risk = label_risk -1

In [25]:
label_risk

array([-1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0,  0,
        0,  0, -1, -1,  1, -1,  1,  0, -1,  0,  1,  1,  1, -1, -1,  0,  1,
        1,  0,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1,
        1,  1,  0,  1,  1,  1,  1,  1,  0, -1,  1,  1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,
        0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0, -1,
        0,  0,  0,  0,  0,  0, -1, -1,  1,  1, -1,  0, -1,  1,  1,  1,  1,
        1,  1,  0,  1,  1,  1,  1, -1, -1, -1,  1,  1,  1,  0,  1,  1,  1,
        1,  1,  1,  1, -1,  1,  1,  0, -1, -1,  1,  1,  1,  0,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1,  1,  1,  1

In [26]:
group_notdys = np.array(1*(metadata['groupDDP'].isin(['1FRdys', '3Ctrl'])))

group_dys = np.array(1*(metadata['groupDDP'] == '2FRndys'))

In [27]:
np.sum(group_notdys) + np.sum(group_dys)

252

In [28]:
label_dys = group_notdys + 2*group_dys 
label_dys[label_dys == 3] = 2
label_dys = label_dys -1

In [29]:
label_dys

array([-1, -1,  0, -1,  0,  0,  0,  0,  0, -1, -1, -1, -1,  0,  0,  0,  0,
        0,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0,  0, -1, -1,  0,  0,  0,
        0,  0, -1, -1,  1, -1,  1, -1, -1, -1,  1,  1,  0, -1, -1, -1,  0,
        0, -1,  1,  1,  0,  0,  1,  1, -1,  0,  0,  1,  1, -1, -1,  1, -1,
        0,  1, -1,  1,  1,  0,  1,  0, -1, -1,  0,  1,  1,  0,  1, -1,  1,
        1,  1,  0,  1,  0,  1,  0,  1, -1,  1,  0,  1, -1,  0,  1,  0,  0,
        1,  0, -1,  0,  0, -1,  0,  1,  1,  1,  1,  1,  1,  0,  0, -1, -1,
        0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,
        0,  0,  0, -1,  0,  0,  0, -1, -1, -1,  0,  0,  0,  0,  0,  0, -1,
       -1,  0,  0,  0,  0,  0, -1, -1,  1,  0, -1,  0, -1,  0,  0,  0,  0,
        0,  0, -1,  1,  1,  1,  1, -1, -1, -1,  1,  0,  1, -1,  1,  0,  0,
        1,  1,  0,  1, -1,  1,  0, -1, -1, -1,  1,  0,  1,  0,  1,  0,  1,
        1,  1,  1,  0,  0,  1,  0,  1,  1,  1,  0,  1, -1,  1,  1,  1,  0,
        1,  1,  0,  1,  1

## create Dataframe with labels to be used

In [30]:
labels_final = pd.DataFrame(data=metadata['id_child'].values, columns=['id_child'])
labels_final['label_dys'] = label_dys
labels_final['label_risk'] = label_risk
labels_final.head()

Unnamed: 0,id_child,label_dys,label_risk
0,1,-1,-1
1,2,-1,0
2,3,0,0
3,4,-1,0
4,5,0,0


In [31]:
print("Data for 'at risk':", labels_risktype.count('atRisk'))
print("Data for 'notAtRisk_rest':", labels_risktype.count('notAtRisk_rest'))
print("Data for 'notAtRisk_highestScores':", labels_risktype.count('notAtRisk_highestScores'))

Data for 'at risk': 93
Data for 'notAtRisk_rest': 47
Data for 'notAtRisk_highestScores': 48


In [32]:
labels_risktype = [x[2] for x in labels if x[1] in [1,0]]
print("Data for 'at risk':", labels_risktype.count('atRisk'))
print("Data for 'notAtRisk_rest':", labels_risktype.count('notAtRisk_rest'))
print("Data for 'notAtRisk_highestScores':", labels_risktype.count('notAtRisk_highestScores'))

Data for 'at risk': 80
Data for 'notAtRisk_rest': 38
Data for 'notAtRisk_highestScores': 44


In [33]:
metadata.loc[(metadata['groupDDP'].isin(['1FRdys', '2FRndys', '3Ctrl']) 
              & metadata['assignment4'].isin(['at risk', 'notAtRisk_rest', 'notAtRisk_highestScores', ]))]

Unnamed: 0,id_child,groupDDP,atRiskOrNotDDP,dyslexicAtMidGroup3DDP,assignment1,assignment2,assignment3,assignment4,childInfoPresent,relativeInfoPresent,mmr_2mth,mmr_5mth,mmr_11mth,mmr_17mth,mmr_23mth,mmr_29mth,mmr_35mth,mmr_41mth,mmr_47mth
2,003,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,0,1,0,1,1,0
4,005,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,0,1,1,1,0
5,006,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,0,1,1,1,0,1,0
8,009,3Ctrl,notAtRisk,1,notEnoughInfo,notEnoughInfo,notEnoughInfo,notAtRisk_rest,1,1,1,0,1,1,1,1,1,1,0
15,016,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,1,1,0,0,0,0,0,0
16,017,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,1,0,0,1,0
17,018,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,1,1,1,1,0
19,021,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,1,1,1,1,1,0
21,023,3Ctrl,notAtRisk,0,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,notAtRisk_highestScores,1,1,1,0,1,0,1,1,1,1,0
22,024,3Ctrl,notAtRisk,0,notAtRisk_rest,notAtRisk_rest,notAtRisk_rest,notAtRisk_rest,1,1,1,0,1,0,1,1,0,1,0


## Custom cnt-file import function:

In [34]:
def read_cnt_file(file,
                  label_group,
                  event_idx = [3, 13, 66],
                  channel_set = "30",
                  tmin = -0.2,
                  tmax = 0.8,
                  lpass = 0.5, 
                  hpass = 40, 
                  threshold = 5, 
                  max_bad_fraction = 0.2,
                  max_bad_channels = 2):
    """ Function to read cnt file. Run bandpass filter. 
    Then detect and correct/remove bad channels and bad epochs.
    Store resulting epochs as arrays.
    
    Args:
    --------
    file: str
        Name of file to import.
    label_group: int
        Unique ID of specific group (must be >0).
    channel_set: str
        Select among pre-defined channel sets. Here: "30" or "62"
    """
    
    if channel_set == "30":
        channel_set = ['O2', 'O1', 'OZ', 'PZ', 'P4', 'CP4', 'P8', 'C4', 'TP8', 'T8', 'P7', 
                       'P3', 'CP3', 'CPZ', 'CZ', 'FC4', 'FT8', 'TP7', 'C3', 'FCZ', 'FZ', 
                       'F4', 'F8', 'T7', 'FT7', 'FC3', 'F3', 'FP2', 'F7', 'FP1']
    elif channel_set == "62":
        channel_set = ['O2', 'O1', 'OZ', 'PZ', 'P4', 'CP4', 'P8', 'C4', 'TP8', 'T8', 'P7', 
                       'P3', 'CP3', 'CPZ', 'CZ', 'FC4', 'FT8', 'TP7', 'C3', 'FCZ', 'FZ', 
                       'F4', 'F8', 'T7', 'FT7', 'FC3', 'F3', 'FP2', 'F7', 'FP1', 'AFZ', 'PO3', 
                       'P1', 'POZ', 'P2', 'PO4', 'CP2', 'P6', 'M1', 'CP6', 'C6', 'PO8', 'PO7', 
                       'P5', 'CP5', 'CP1', 'C1', 'C2', 'FC2', 'FC6', 'C5', 'FC1', 'F2', 'F6', 
                       'FC5', 'F1', 'AF4', 'AF8', 'F5', 'AF7', 'AF3', 'FPZ']
    else:
        print("Predefined channel set given by 'channel_set' not known...")
        
    
    # Initialize array
    signal_collection = np.zeros((0,len(channel_set),501))
    label_collection = [] #np.zeros((0))
    
    # Import file
    try:
        data_raw = mne.io.read_raw_cnt(file, eog='auto', preload=True)
    except ValueError:
        print("ValueError")
        print("Could not load file:", file)
        return None, None
    
    # Band-pass filter (between 0.5 and 40 Hz. was 0.5 to 30Hz in Stober 2016)
    data_raw.filter(0.5, 40, fir_design='firwin')

    events_from_annot, event_dict = mne.events_from_annotations(data_raw)
    
    # TODO: check here already if event_idx's in event_dict
    
    # Set baseline:
    baseline = (None, 0)  # means from the first instant to t = 0

    # Select channels to exclude (if any)
    channels_exclude = [x for x in data_raw.ch_names if x not in channel_set]
    channels_exclude = [x for x in channels_exclude if x not in ['HEOG', 'VEOG']]#, 'STI 014']]
    
    for event_id in event_idx:
        if str(event_id) in event_dict:
            # Pick EEG channels 
            picks = mne.pick_types(data_raw.info, meg=False, eeg=True, stim=False, eog=False,
                               #exclude=data_exclude)#'bads'])
                                   include=channel_set, exclude=channels_exclude)#'bads'])

            epochs = mne.Epochs(data_raw, events=events_from_annot, event_id=event_dict, 
                                tmin=tmin, tmax=tmax, proj=True, picks=picks,
                                baseline=baseline, preload=True, event_repeated='merge', verbose=False)

            # Detect potential bad channels and epochs
            bad_channels, bad_epochs = helper_functions.select_bad_epochs(epochs, 
                                                                          event_id, 
                                                                          threshold = threshold, 
                                                                          max_bad_fraction = max_bad_fraction)

            # Interpolate bad channels
            # ------------------------------------------------------------------
            if len(bad_channels) > 0: 
                if len(bad_channels) > max_bad_channels: 
                    print(20*'--')
                    print("Found too many bad channels (" + str(len(bad_channels)) + ")")
                    return None, None
                else:
                    # Mark bad channels:
                    data_raw.info['bads'] = bad_channels
                    # Pick EEG channels:
                    picks = mne.pick_types(data_raw.info, meg=False, eeg=True, stim=False, eog=False,
                                       #exclude=data_exclude)#'bads'])
                                       include=channel_set, exclude=channels_exclude)#'bads'])
                    epochs = mne.Epochs(data_raw, events=events_from_annot, event_id=event_dict, 
                                        tmin=tmin, tmax=tmax, proj=True, picks=picks,
                                        baseline=baseline, preload=True, verbose=False)
                    # Interpolate bad channels using functionality of 'mne'
                    epochs.interpolate_bads()

            # Get signals as array and add to total collection
            signals_cleaned = epochs[str(event_id)].drop(bad_epochs).get_data()
            signal_collection = np.concatenate((signal_collection, signals_cleaned), axis=0)
            #label_collection = np.concatenate((label_collection, event_id*label_group*np.ones((signals_cleaned.shape[0]))), axis=0)
            label_collection += [str(event_id) + label_group] * signals_cleaned.shape[0]

    return signal_collection, label_collection#.astype(int)

In [28]:
found_ids = [x[:3] for x in cnt_files]
idx = np.where(np.array(found_ids) == '036')[0]
[cnt_files[x] for x in idx]

['036_17_mc_mmn25_wk.cnt', '036_17_mc_mmn36.cnt']

In [39]:
filename = os.path.join(PATH_DATA_01, '035_17_jc_mmn25.cnt')
data_raw = mne.io.read_raw_cnt(filename, eog=['HEOG', 'VEOG'], preload=True) #, montage=None, eog='auto', preload=True)
#data_raw.filter(0.5, 40, fir_design='firwin')

Reading 0 ... 371699  =      0.000 ...   743.398 secs...


In [139]:
data_raw.info #.get_data().shape

<Info | 17 non-empty fields
    bads : list | 0 items
    ch_names : list | O2, O1, OZ, PZ, P4, CP4, P8, C4, TP8, ...
    chs : list | 64 items (EEG: 62, EOG: 2)
    comps : list | 0 items
    custom_ref_applied : bool | False
    dev_head_t : Transform | 3 items
    events : list | 0 items
    highpass : float | 0.0 Hz
    hpi_meas : list | 0 items
    hpi_results : list | 0 items
    lowpass : float | 250.0 Hz
    meas_date : NoneType | unspecified
    nchan : int | 64
    proc_history : list | 0 items
    projs : list | 0 items
    sfreq : float | 500.0 Hz
    subject_info : dict | 5 items
    acq_pars : NoneType
    acq_stim : NoneType
    ctf_head_t : NoneType
    description : NoneType
    dev_ctf_t : NoneType
    device_info : NoneType
    dig : NoneType
    experimenter : NoneType
    file_id : NoneType
    gantry_angle : NoneType
    helium_info : NoneType
    hpi_subsystem : NoneType
    kit_system_id : NoneType
    line_freq : NoneType
    meas_id : NoneType
    proj_id : No

In [140]:
events_from_annot, event_dict = mne.events_from_annotations(data_raw)
print(event_dict)
print(events_from_annot[:5])

Used Annotations descriptions: ['0', '14', '15', '4', '5', '77', '88']
{'0': 1, '14': 2, '15': 3, '4': 4, '5': 5, '77': 6, '88': 7}
[[6663    0    4]
 [7388    0    4]
 [8115    0    4]
 [8840    0    4]
 [9566    0    2]]


In [126]:
# channel names for 62 EEG channel case: 
print(epochs.ch_names)

['O2', 'O1', 'OZ', 'PZ', 'P4', 'CP4', 'P8', 'C4', 'TP8', 'T8', 'P7', 'P3', 'CP3', 'CPZ', 'CZ', 'FC4', 'FT8', 'TP7', 'C3', 'FCZ', 'FZ', 'F4', 'F8', 'T7', 'FT7', 'FC3', 'F3', 'FP2', 'F7', 'FP1', 'AFZ', 'PO3', 'P1', 'POZ', 'P2', 'PO4', 'CP2', 'P6', 'M1', 'CP6', 'C6', 'PO8', 'PO7', 'P5', 'CP5', 'CP1', 'C1', 'C2', 'FC2', 'FC6', 'C5', 'FC1', 'F2', 'F6', 'FC5', 'F1', 'AF4', 'AF8', 'F5', 'AF7', 'AF3', 'FPZ']


## Check how many EEG channels the cnt-files feature... 

In [142]:
format_collection = []
for i, filename in enumerate(cnt_files):
    # Import file 
    file = os.path.join(PATH_CNTS, filename)
    try:
        data_raw = mne.io.read_raw_cnt(file, eog='auto', preload=True)
        format_collection.append((i, len(data_raw.ch_names)))
        print(i, len(data_raw.ch_names))
    except ValueError:
        print("ValueError for file:", filename)
        format_collection.append((i, 0))

Reading 0 ... 1500919  =      0.000 ...  3001.838 secs...
0 64
Reading 0 ... 1120199  =      0.000 ...  2240.398 secs...
1 64
Reading 0 ... 1481999  =      0.000 ...  2963.998 secs...
2 64
ValueError for file: 007_17_jc_mmn.cnt
Reading 0 ... 618439  =      0.000 ...  1236.878 secs...
4 64
Reading 0 ... 449859  =      0.000 ...   899.718 secs...
5 64
Reading 0 ... 1868319  =      0.000 ...  3736.638 secs...
6 64
Reading 0 ... 781299  =      0.000 ...  1562.598 secs...
7 64
Reading 0 ... 1872419  =      0.000 ...  3744.838 secs...
8 64
Reading 0 ... 369239  =      0.000 ...   738.478 secs...
9 64
Reading 0 ... 370279  =      0.000 ...   740.558 secs...
10 64
Reading 0 ... 369359  =      0.000 ...   738.718 secs...
11 64
Reading 0 ... 368939  =      0.000 ...   737.878 secs...
12 64
Reading 0 ... 375739  =      0.000 ...   751.478 secs...
13 64
Reading 0 ... 377639  =      0.000 ...   755.278 secs...
14 64
Reading 0 ... 256699  =      0.000 ...   513.398 secs...
15 64
Reading 0 ... 379099

KeyboardInterrupt: 

In [144]:
a,b = zip(*format_collection)
len(np.where((np.array(b) == 64))[0]), len(np.where((np.array(b) == 32))[0]), len(a)

(86, 0, 86)

So far we 'only' have about 60 cnt-files of which we have a label ("risk group" vs "no risc group").
And only 42 of them feature 62 EEG channels. I hence switched to 30 EEG channels and picked the ones that are present in all patient datasets.

# Workflow data processing
1. Load cnt files.
2. Select same number of channels (here: 30 same channels which exist for both 30 and 62 channel data)
3. Preprocess raw data (bandpass + detect outliers and 'bad' epochs).
4. Store epoch data and event type as array

## LABELS:
+ After Karin's search we have proper labels for much more files!  


In [38]:
def standardize_EEG(data_array,
                    std_aim = 1,
                   centering = 'per_channel',
                   scaling = 'global'):
    """ Center data around 0 and adjust standard deviation.
    
    Args:
    --------
    centering: str
        Specify if centering should be done "per_channel", or "global".
    scaling: str
        Specify if scaling should be done "per_channel", or "global".
    """
    if centering == 'global':
        data_mean = data_array.mean()

        # Center around 0
        data_array = data_array - data_mean

    elif centering == 'per_channel':       
        for i in range(data_array.shape[1]):
            
            data_mean = data_array[:,i,:].mean()

            # Center around 0
            data_array[:,i,:] = data_array[:,i,:] - data_mean

    else:
        print("Centering method not known.")
        return None
        
    if scaling == 'global':
        data_std = data_array.std()
        
        # Adjust std to std_aim
        data_array = data_array * std_aim/data_std
    
    elif scaling == 'per_channel':
        for i in range(data_array.shape[1]):
            
            data_std = data_array[:,i,:].std()

            # Adjust std to std_aim
            data_array[:,i,:] = data_array[:,i,:] * std_aim/data_std
    else:
        print("Given method is not known.")
        return None
    
    
    return data_array

In [39]:
# Initialize array
signal_collection = np.zeros((0,30,501)) #62
label_collection = []
ID_collection = []
metadata_collection = []

collect_in_one_array = False

for i, filename in enumerate(cnt_files):
    
    # First check if we have proper label for that file
    # -----------------------------------------------------------
    
    ID = filename[:3]
    label = labels_final[labels_final['id_child'] == ID]['label_dys'].values[0]
    label_risk = labels_final[labels_final['id_child'] == ID]['label_risk'].values[0]
    #label = metadata[metadata['id_child'] == ID]['groupDDP'].values[0]
    #label_risk = metadata[metadata['id_child'] == ID]['assignment4'].values[0]
    
    if (label < 0) or (label_risk < 0):
        print("No proper label found for file: ", filename)
    else:
        #label_group = int(metadata[metadata["file"].str.match(filename[:-4])]['group'])
        label_group = 'dys' + str(label) + '_risk' + str(label_risk)
        
        print(40*"=")
        print("Importing file: ",filename)
        print("Data belongs into group: ", label_group)

        # Import data and events
        file = os.path.join(PATH_CNTS, filename)

        signal_collect, label_collect = read_cnt_file(file, 
                                                      label_group,
                                                      event_idx = [3, 13, 66],
                                                      channel_set = "30",
                                                      tmin = -0.2,
                                                      tmax = 0.8,
                                                      lpass = 0.5, 
                                                      hpass = 40, 
                                                      threshold = 5, 
                                                      max_bad_fraction = 0.2)
        
        
        # Standardize data
        # --------------------------------------------------------
        if signal_collect is not None:
            signal_collect = standardize_EEG(signal_collect,
                                 std_aim = 1,                   
                                 centering = 'per_channel',
                                 scaling = 'global')
        
        # Save data and labels
        # ---------------------------------------------------------
        if signal_collect is not None:
            
            if collect_in_one_array:

                # Get signals as array and add to total collection
                print(signal_collect.shape, len(label_collect))
                signal_collection = np.concatenate((signal_collection, signal_collect), axis=0)
                label_collection += label_collect

            else:
                if len(label_collect) > 1:
                #if label_collect is not None:
                    file = os.path.join(PATH_OUTPUT, "processed_data_" + filename[:-4] + ".npy")
                    np.save(file, signal_collect)

                    #filename = os.path.join(PATH_OUTPUT, "EEG_data_30ch_1s_corrected_metadata_ID"+ ID + ".csv")
                    file = os.path.join(PATH_OUTPUT, "processed_data_" + filename[:-4] + ".csv")

                    with open(file, 'w', newline='') as csvFile:
                        writer = csv.writer(csvFile)
                        writer.writerow(label_collect)
                    csvFile.close()
            
            ID_collection += [ID] * len(label_collect) 
            metadata_collection.append((i, filename, signal_collection.shape[0]))

No proper label found for file:  001_41_jc_mmn.cnt
No proper label found for file:  001_41_jc_mmn36_slp.cnt
Importing file:  003_41_jc_mmn36_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 750799  =      0.000 ...  1501.598 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 37 bad epochs in a total of 29  channels.
Marked 37 bad epochs in a total of 800  epochs.
Found 6 bad epochs in a total of 6  channels.
M


FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 18 bad epochs in a total of 6  channels.
Marked 18 bad epochs in a total of 600  epochs.
Found 5 bad epochs in a total of 4  channels.
Marked 5 bad epochs in a total of 150  epochs.
Importing file:  015_41_jc_mmn39_380_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 472039  =      0.000 ...   944.078 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a on

- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 3 bad epochs in a total of 4  channels.
Marked 3 bad epochs in a total of 177  epochs.
Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 42  epochs.
Importing file:  023_41_mc_mmn39_380_slp.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 483659  =      0.000 ...   967.318 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations d

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 126 bad epochs in a total of 24  channels.
Marked 126 bad epochs in a total of 804  epochs.
Found 33 bad epochs in a total of 16  channels.
Marked 33 bad epochs in a total of 202  epochs.
Importing file:  035_41_jc_mmn39_1500.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 300939  =      0.000 ...   601.878 secs...
Filtering raw data in 1 contiguous segment
Setting up b

- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['12', '2', '55']
Importing file:  104_41_jr_mmn36_2_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 376859  =      0.000 ...   753.718 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.0

- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 375 bad epochs in a total of 30  channels.
Found bad channel (more than 160.0  bad epochs): Channel no:  0
Found bad channel (more than 160.0  bad epochs): Channel no:  1
Found bad channel (more than 160.0  bad epochs): Channel no:  2
Found bad channel (more than 160.0  bad epochs): Channel no:  3
Found bad channel (more than 160.0  bad epochs): Channel no:  4
Found bad channel (more than 160.0  bad epochs): Channel no:  5
Found bad channel (more than 160.0  bad epochs): Channel no:  6
Found bad channel (more than 160.0  bad epochs): Channel no:  7
Found bad channel (more than 160.0  bad epochs): Channel no:  8
Found bad channel (more than 160.0  bad epochs): Channel no:  9
Found bad channel (more than 160.0  bad epochs): Channel no:  10
Found bad channel (more than 160.0  bad epochs): Channel no:  11
Found bad chann


FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 39 bad epochs in a total of 10  channels.
Marked 39 bad epochs in a total of 400  epochs.
Found 5 bad epochs in a total of 6  channels.
Marked 5 bad epochs in a total of 50  epochs.
Found 6 bad epochs in a total of 3  channels.
Marked 6 bad epochs in a total of 50  epochs.
Importing file:  121_41_jr_mmn36_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 745639  =      0.000 ...  1491.278 secs...
Filtering raw data in 1 contiguous segment
Setting up band-

- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 414 bad epochs in a total of 12  channels.
Found bad channel (more than 160.0  bad epochs): Channel no:  29
Marked 186 bad epochs in a total of 800  epochs.
Found 53 bad epochs in a total of 6  channels.
Found bad channel (more than 20.0  bad epochs): Channel no:  29
Marked 22 bad epochs in a total of 100  epochs.
Found 46 bad epochs in a total of 5  channels.
Found bad channel (more than 20.0  bad epochs): Channel no:  29
Marked 15 bad epochs in a total of 100  epochs.
Importing file:  129_41_jr_mmn36_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 742459  =      0.000 ...  1484.918 secs...
Filtering r

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 111 bad epochs in a total of 19  channels.
Marked 111 bad epochs in a total of 600  epochs.
Found 23 bad epochs in a total of 15  channels.
Marked 23 bad epochs in a total of 150  epochs.
Importing file:  145_41_mr_mmn36_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 746739  =      0.000 ...  1493.478 secs...
Filtering raw data in 1 contiguous segment
Setting up ban

Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 26 bad epochs in a total of 30  channels.
Marked 26 bad epochs in a total of 600  epochs.
Found 5 bad epochs in a total of 12  channels.
Marked 5 bad epochs in a total of 150  epochs.
Importing file:  152_41_jr_mmn39_380.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 476919  =      0.000 ...   953.838 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter par

- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 98 bad epochs in a total of 15  channels.
Marked 98 bad epochs in a total of 600  epochs.
Found 22 bad epochs in a total of 11  channels.
Marked 22 bad epochs in a total of 150  epochs.
Importing file:  158_41_mr_mmn39_1500.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 665019  =      0.000 ...  1330.038 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotatio

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 52 bad epochs in a total of 13  channels.
Marked 52 bad epochs in a total of 600  epochs.
Found 10 bad epochs in a total of 7  channels.
Marked 10 bad epochs in a total of 150  epochs.
No proper label found for file:  165_41_jr_mmn39_380.cnt
Importing file:  166-488-41m-jr-mmn39.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 924139  =      0.000 ...  1848.278 secs...
F

Found 30 bad epochs in a total of 4  channels.
Marked 30 bad epochs in a total of 600  epochs.
Found 6 bad epochs in a total of 2  channels.
Marked 6 bad epochs in a total of 150  epochs.
Importing file:  168_41_jr_mmn39_380_mixed.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 502959  =      0.000 ...  1005.918 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '99']
Found 15 bad epochs in a total of 11  channels.
Mar

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 207 bad epochs in a total of 5  channels.
Found bad channel (more than 120.0  bad epochs): Channel no:  25
Marked 4 bad epochs in a total of 600  epochs.
Found 43 bad epochs in a total of 1  channels.
Found bad channel (more than 30.0  bad epochs): Channel no:  25
Marked 0 bad epochs in a total of 150  epochs.
Importing file:  174_41_jd_mmn39_1500.cnt
Data belongs into group: 

- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 21 bad epochs in a total of 7  channels.
Marked 21 bad epochs in a total of 600  epochs.
Found 5 bad epochs in a total of 3  channels.
Marked 5 bad epochs in a total of 150  epochs.
Importing file:  179_41_mr_mmn39_1500.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 406059  =      0.000 ...   812.118 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopba

Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 150  epochs.
Importing file:  301_41_jc_mmn36_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 734639  =      0.000 ...  1469.278 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 25 bad epochs in a total of 15  channels.
Marked 25 bad epochs in a total of 800  epochs.
Found 6 bad epochs in a total of 7  channels.
Marked 6 bad epo

- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 219 bad epochs in a total of 30  channels.
Found bad channel (more than 134.6  bad epochs): Channel no:  8
Marked 80 bad epochs in a total of 673  epochs.
Found 36 bad epochs in a total of 30  channels.
Found bad channel (more than 17.0  bad epochs): Channel no:  8
Marked 22 bad epochs in a total of 85  epochs.
Found 47 bad epochs in a total of 30  channels.
Found bad channel (more than 17.0  bad epochs): Channel no:  0
Found bad channel (more than 17.0  bad epochs): Channel no:  6
Found bad channel (more than 17.0  bad epochs): Channel no:  8
Marked 26 bad epochs in a total of 85  epochs.
----------------------------------------
Found too many bad channels (3)
Importing file:  312_

- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 49 bad epochs in a total of 30  channels.
Marked 49 bad epochs in a total of 759  epochs.
Found 6 bad epochs in a total of 5  channels.
Marked 6 bad epochs in a total of 120  epochs.
Importing file:  321_41_mc_mmn39_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 937819  =      0.000 ...  1875.638 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window

Found 2 bad epochs in a total of 3  channels.
Marked 2 bad epochs in a total of 150  epochs.
Importing file:  330_41_mc_mmn39.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 931939  =      0.000 ...  1863.878 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '99']
Found 37 bad epochs in a total of 30  channels.
Marked 37 bad epochs in a total of 1000  epochs.
Found 15 bad epochs in a total of 30  channels.
Marked 15 b

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 16 bad epochs in a total of 25  channels.
Marked 16 bad epochs in a total of 1000  epochs.
No outliers found with given threshold.
Importing file:  346_41_mc_mmn39_wk.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 933039  =      0.000 ...  1866.078 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---

- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 24 bad epochs in a total of 28  channels.
Marked 24 bad epochs in a total of 800  epochs.
Found 3 bad epochs in a total of 2  channels.
Marked 3 bad epochs in a total of 100  epochs.
Found 4 bad epochs in a total of 22  channels.
Marked 4 bad epochs in a total of 100  epochs.
No proper label found for file:  409_41_jd_mmn25_wk.cnt
No proper label found for file:  409_41_jd_mmn36_wk.cnt
Importing file:  411_41_md_mmn25_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 724139  =      0.000 ...  1448.278 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.019

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '66']
Found 14 bad epochs in a total of 19  channels.
Marked 14 bad epochs in a total of 800  epochs.
Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 100  epochs.
Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 100  epochs.
Importing file:  425_41_jd_mmn25_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 726819 

Found 4 bad epochs in a total of 2  channels.
Marked 4 bad epochs in a total of 100  epochs.
Importing file:  436_41_jd_mmn39.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 936139  =      0.000 ...  1872.278 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 44 bad epochs in a total of 30  channels.
Marked 44 bad epochs in a total of 1000  epochs.
Found 9 bad epochs in a total of 4  channels.
Marked 9 bad epoch

Importing file:  455_41_jd_mmn39_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 938779  =      0.000 ...  1877.558 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '99']
Found 32 bad epochs in a total of 12  channels.
Marked 32 bad epochs in a total of 1000  epochs.
Found 5 bad epochs in a total of 6  channels.
Marked 5 bad epochs in a total of 150  epochs.
Importing file:  456_41_md_mmn39_wk.cnt
Data belongs int

- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 41 bad epochs in a total of 12  channels.
Marked 41 bad epochs in a total of 1000  epochs.
Found 9 bad epochs in a total of 30  channels.
Marked 9 bad epochs in a total of 150  epochs.
Importing file:  472_41_jd_mmn39_wk.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 929379  =      0.000 ...  1858.758 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming wind

Found 50 bad epochs in a total of 30  channels.
Marked 50 bad epochs in a total of 1000  epochs.
Found 11 bad epochs in a total of 30  channels.
Marked 11 bad epochs in a total of 150  epochs.
Importing file:  482_41_md_mmn39_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 932959  =      0.000 ...  1865.918 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 22 bad epochs in a total of 9  channels.
Marked 22 b

Importing file:  494_41_jd_mmn39_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 936099  =      0.000 ...  1872.198 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 8 bad epochs in a total of 4  channels.
Marked 8 bad epochs in a total of 1000  epochs.
No outliers found with given threshold.
Importing file:  496_41_jd_mmn39_wk.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 934499  =      0.000 ...  

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['0', '13', '3', '99']
Found 16 bad epochs in a total of 30  channels.
Marked 16 bad epochs in a total of 212  epochs.
No outliers found with given threshold.
Importing file:  611-157-41m-mc-mmn39_2.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 276919  =      0.000 ...   553.838 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parame


FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 31 bad epochs in a total of 29  channels.
Marked 31 bad epochs in a total of 697  epochs.
Found 2 bad epochs in a total of 3  channels.
Marked 2 bad epochs in a total of 88  epochs.
Found 2 bad epochs in a total of 2  channels.
Marked 2 bad epochs in a total of 87  epochs.
Importing file:  618-163-41m-jc-mmn36.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 605659  =      0.000 ...  1211.318 secs...
Filtering raw data in 1 contiguous segment
Setting up ban

Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 41 bad epochs in a total of 11  channels.
Marked 41 bad epochs in a total of 1071  epochs.
Found 7 bad epochs in a total of 10  channels.
Marked 7 bad epochs in a total of 268  epochs.
Importing file:  629-357-41m-jc-mmn39-2.cnt
Data belongs into group:  dys0_risk0
Reading 0 ... 196059  =      0.000 ...   392.118 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass fi

Found 17 bad epochs in a total of 6  channels.
Marked 17 bad epochs in a total of 800  epochs.
No outliers found with given threshold.
Found 1 bad epochs in a total of 1  channels.
Marked 1 bad epochs in a total of 100  epochs.
Importing file:  702-071-41m-jr-mmn36.cnt
Data belongs into group:  dys0_risk1
Reading 0 ... 845959  =      0.000 ...  1691.918 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 80 bad epochs i

- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 37 bad epochs in a total of 30  channels.
Marked 37 bad epochs in a total of 400  epochs.
Found 3 bad epochs in a total of 30  channels.
Marked 3 bad epochs in a total of 50  epochs.
Found 5 bad epochs in a total of 4  channels.
Marked 5 bad epochs in a total of 50  epochs.
Importing file:  710-078-41m-jr-mmn36.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 781019  =      0.000 ...  1562.038 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 41 bad epochs in a total of 10  channels.
Marked 41 bad epochs in a total of 800  epochs.
Found 4 bad epochs in a total of 2  channels.
Marked 4 bad epochs in a total of 100  epochs.
Found 8 bad epochs in a total of 3  channels.
Marked 8 bad epochs in a total of 100  epochs.
Importing file:  717-111-41m-mr-mmn36.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 666559  = 

- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '66']
Found 268 bad epochs in a total of 18  channels.
Found bad channel (more than 160.0  bad epochs): Channel no:  0
Marked 38 bad epochs in a total of 800  epochs.
Found 31 bad epochs in a total of 3  channels.
Found bad channel (more than 20.0  bad epochs): Channel no:  0
Marked 3 bad epochs in a total of 100  epochs.
Found 36 bad epochs in a total of 6  channels.
Found bad channel (more than 20.0  bad epochs): Channel no:  0
Marked 6 bad epochs in a total of 100  epochs.
Importing file:  726-126-41m-jr-mmn36.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 741519  =      0.000 ...  1483.038 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB s

Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 95 bad epochs in a total of 28  channels.
Marked 95 bad epochs in a total of 800  epochs.
Found 12 bad epochs in a total of 3  channels.
Marked 12 bad epochs in a total of 100  epochs.
Importing file:  735-125-41m-mr-mmn39.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 650139  =      0.000 ...  1300.278 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter p

- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 3301 samples (6.602 sec)

Used Annotations descriptions: ['13', '3', '99']
Found 42 bad epochs in a total of 11  channels.
Marked 42 bad epochs in a total of 600  epochs.
Found 13 bad epochs in a total of 8  channels.
Marked 13 bad epochs in a total of 150  epochs.
Importing file:  747-410-41m-mr-mmn39_langzt.cnt
Data belongs into group:  dys1_risk1
Reading 0 ... 967679  =      0.000 ...  1935.358 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53

In [48]:
signal_collect is None

True

In [34]:
filename[:-4]

'005_17_jc_mmn36_slp_mmn25_slp_mmn47_mixed'

In [32]:
signal_collection.shape, len(label_collection), len(ID_collection)

((0, 30, 501), 0, 500)

In [57]:
metadata_collection[:10]

[(1, '005_17_jc_mmn36_slp_mmn25_slp_mmn47_mixed.cnt', 500),
 (2, '006_17_mc_mmn36_slp.cnt', 993),
 (4, '007_17_jc_mmn2.cnt', 993)]

In [55]:
print("Unique labels found in data:", list(set(label_collection)))

Unique labels found in data: ['13dys0_risk0', '3dys0_risk0', '66dys0_risk0']


We hence get a dataset of 39083 datapoints with known label.  
Each datapoint consits of a 1-second EEG signal of 30 channels with a 500Hz sampling rate. Thus arrays with a size of 30 x 501. 

# Save entire processed dataset:

In [75]:
filename = os.path.join(PATH_OUTPUT, "EEG_data_30channels_1s_corrected.npy")
np.save(filename, signal_collection)

filename = os.path.join(PATH_OUTPUT, "EEG_data_30channels_1s_corrected_labels.npy")
np.save(filename, label_collection)

import csv
filename = os.path.join(PATH_OUTPUT, "EEG_data_30channels_1s_corrected_metadata.csv")

with open(filename, 'w') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerows(metadata_collection)
csvFile.close()