## Processing raw EEG data

#### Import Packages 

In [6]:
import os
import mne
import functools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from autoreject import AutoReject # https://autoreject.github.io/stable/index.html
from multiprocessing import Pool


import local_paths

from functions import epodium

#### Filtering ePodium dataset and rejecting bad trials

In [None]:
# These experiments are incomplete
ignore_files = ["113a", "107b (deel 1+2)", "132a", "121b(2)", "113b", "107b (deel 3+4)", "147a",
                "121a", "134a", "143b", "121b(1)", "145b", "152a", "184a", "165a", "151a", "163a"]

def process_file(method, file):
    # methods are: 'autoreject'
    filename, extension = os.path.splitext(file)
    path = os.path.join(local_paths.ePod_dataset, file)
    
    if extension == '.bdf':
        if(method == "autoreject"):
            path_processed = os.path.join(local_paths.ePod_processed_autoreject, 'epochs', filename + ".npy")
            path_events = os.path.join(local_paths.ePod_processed_autoreject, 'events', filename + ".txt")        

            if os.path.exists(path_processed) and os.path.exists(path_events):
                # print(f"File {file} already processed \n", end = '')
                return

            if filename in ignore_files:
                return

            # print(f"Processing file: {file}  \n" , end = '')        
            raw = mne.io.read_raw_bdf(path, preload = True, verbose = False)
            events = mne.find_events(raw, verbose = False)
            events_12 = epodium.group_events_12(events)

            # Set electrodes
            raw.pick_channels(epodium.channel_names)
            montage = mne.channels.make_standard_montage('standard_1020') 
            raw.info.set_montage(montage, on_missing = 'ignore')

            # High-pass filter for detrending
            raw.filter(0.1, None, verbose = False)
            # Create epochs from raw
            try:
                epochs = mne.Epochs(raw, events_12, epodium.event_dictionary, -0.2, 0.8, preload = True, verbose = False)
            except:
                print(f"Not all events in file {file} \n", end = '')
                return
            # Low pass filter for high-frequency artifacts
            epochs.filter(None, 40, verbose = False)

            # Reject bad trials and repair bad sensors in EEG
            ar = AutoReject()
            epochs_clean = ar.fit_transform(epochs)  

            # Save data and events
            np.save(path_processed, epochs_clean.get_data())        
            np.savetxt(path_events, epochs_clean.events, fmt='%i')

# Multiprocessing
pool = Pool(processes = 4)
# TODO glob :glob.glob(os.path.join(local_paths.ePod_dataset, '*.bdf')
pool.map(functools.partial(process_file, "autoreject"), os.listdir(local_paths.ePod_dataset))

print("All files processed")

Running autoreject on ch_type=eeg
Running autoreject on ch_type=eeg


  0%|          | Creating augmented epochs : 0/32 [00:00<?,       ?it/s]

  0%|          | Creating augmented epochs : 0/32 [00:00<?,       ?it/s]

Running autoreject on ch_type=eeg


  0%|          | Creating augmented epochs : 0/32 [00:00<?,       ?it/s]

  0%|          | Computing thresholds ... : 0/32 [00:00<?,       ?it/s]

  0%|          | Computing thresholds ... : 0/32 [00:00<?,       ?it/s]

  0%|          | Computing thresholds ... : 0/32 [00:00<?,       ?it/s]

#### Split into seperate files for each events and downsample

In [None]:
montage = mne.channels.make_standard_montage('standard_1020') 

# Check if each file already exist
unsaved_files = []
for npy_file in os.listdir(os.path.join(local_paths.ePod_processed_autoreject, 'epochs')):
    for event in epodium.event_dictionary:
        npy_name = npy_file[:-4] + "_" + event + ".npy"
        path_processed = os.path.join(local_paths.ePod_processed_autoreject_epochs_split_downsampled, \
                                      npy_file[:-4] + "_" + event + ".npy")
        if not os.path.exists(path_processed):
            unsaved_files.append(npy_file)

if(unsaved_files): 
    print(f"Splitting files: {unsaved_files}")
else: 
    print("All files complete")

for npy_file in unsaved_files:
    npy = np.load(os.path.join(local_paths.ePod_processed_autoreject, 'epochs', file))
    events_12 = np.loadtxt(os.path.join(local_paths.ePod_processed_autoreject, 'events', file[:-4] + ".txt"), dtype=int)
    info = mne.create_info(epodium.channel_names, 2048, ch_types='eeg')

    epochs = mne.EpochsArray(npy, info, events=events_12, tmin=-0.2, 
                             event_id=epodium.event_dictionary, verbose=False)
    epochs.info.set_montage(montage, on_missing = 'ignore')

    for event in epodium.event_dictionary:
        path_processed = os.path.join(local_paths.ePod_processed_autoreject_epochs_split_downsampled, \
                                      npy_file[:-4] + "_" + event + ".npy")
        if os.path.exists(path_processed):
            continue
        np.save(path_processed, epochs[event].resample(512).get_data())  

        

## Step-by-step of ideal pipeline

#### Processing Pipeline

+ Prepare EEG 
1. Drop unused channels
2. Subtract reference (mastoids)
3. Detrend 
4. Filter
5. Remove bad channels

+ Segment EEG into standard and deviant epochs (ERPs)
1. subtract baseline
2. Reject artefacts
3. Average to get evoked (for each marker/subject/channel separately)

+ Calculate Mismatch response 
1. deviant - standard for a single subject, for example GiepST_D - GiepST_S
2. check differences between channels and subjects

#### Analyse mismatch response 

Deviant minus standard ERP
+ Check between subjects to see if the subjects have similar responses
+ Check between channels to observe which parts of the brain are more influenced by the events


#### Extract features (Optional)
+ peak latency
+ peak amplitude
+ mean amplitude


#### Create labels


#### Input data into DL models

#### Visualise results