## Processing raw EEG data

#### Import Packages 

In [None]:
import mne
import numpy as np
import pandas as pd
import autoreject

import os
import glob
import functools
import ipywidgets
# from multiprocessing import Pool

import local_paths
from functions import epodium

#### Filtering ePodium dataset and rejecting bad trials

The EEG data is processed with the following techniques:
+ A high-pass filter on the raw EEG sequence with cutoff frequency 0.1 Hz to remove slow trends
+ Splitting the raw data into 1 second epochs in which the event occurs at 0.2s.
+ The epochs are cleaned with the autoreject library. This library contains classes that automatically reject bad trials and repair bad sensors in EEG data. The AutoReject and Ransac classes are used. https://autoreject.github.io/stable/index.html


+ A low-pass filter on the epochs 

### Choose processing methods

In [None]:
processing_method_widget = ipywidgets.RadioButtons(options=['autoreject', 'ransac'], 
                                                   value = 'autoreject', 
                                                   description='processing:')
display(processing_method_widget)

### Processing pipeline 
The *process_raw* function processes a raw file with the chosen method and saves the resulting .npy file into a new folder.

In [None]:
# These experiments are incomplete
ignore_files = ["113a", "107b (deel 1+2)", "132a", "121b(2)", "113b", "107b (deel 3+4)", "147a",
                "121a", "134a", "143b", "121b(1)", "145b", "152a", "184a", "165a", "151a", "163a",
                "207a", "215b", "201b"]

def process_raw(path_file, path_processed, method, verbose = False):
    """
        The 'process_raw' function processes a raw file with the chosen method and saves the resulting .npy file into 'path_processed'.
        Processing methods are: 'autoreject', 'ransac'
    """
    # 
    file = os.path.basename(path_file)
    filename, extension = os.path.splitext(file)
    path = os.path.join(local_paths.ePod_dataset, file)    
    
    path_processed_file = os.path.join(path_processed, 'epochs', filename + ".npy")
    path_processed_events = os.path.join(path_processed, 'events', filename + ".txt")        

    if os.path.exists(path_processed_file) and os.path.exists(path_processed_events):
        if verbose:
            print(f"File {file} already processed \n", end = '')
        return

    if filename in ignore_files:
        if verbose:
            print(f"File {file} ignored \n", end = '')
        return

    if verbose:
        print(f"Processing file: {file}  \n" , end = '')        
    raw = mne.io.read_raw_bdf(path_file, preload = True, verbose = False)
    events = mne.find_events(raw, verbose = False, min_duration = 2/epodium.frequency)
    events_12 = epodium.group_events_12(events)

    # Set electrodes
    raw.pick_channels(epodium.channel_names)
    montage = mne.channels.make_standard_montage('standard_1020') 
    raw.info.set_montage(montage, on_missing = 'ignore')

    # High-pass filter for detrending
    raw.filter(0.1, None, verbose = False)
    # Create epochs from raw
    try:
        epochs = mne.Epochs(raw, events_12, epodium.event_dictionary, -0.2, 0.8, preload = True, verbose = False)
    except:
        print(f"Not all events in file {file} \n", end = '')
        return
    # Low pass filter for high-frequency artifacts
    epochs.filter(None, 40, verbose = False)

    # Reject bad trials and repair bad sensors in EEG
    if(method == "autoreject"):
        ar = autoreject.AutoReject()
    elif(method == "ransac"):
        ar = autoreject.Ransac()
    else:
        print("method not known")
    epochs_clean = ar.fit_transform(epochs)  

    # Save data and events
    np.save(path_processed_file, epochs_clean.get_data())        
    np.savetxt(path_processed_events, epochs_clean.events, fmt='%i')

if(processing_method_widget.value == "autoreject"):
    path_processed = local_paths.ePod_processed_autoreject
if(processing_method_widget.value == "ransac"):
    path_processed = local_paths.ePod_processed_ransac

## Multiprocessing
# pool = Pool(processes = 8)
# pool.map(functools.partial(process_file, method = "autoreject"), sorted(glob.glob(os.path.join(local_paths.ePod_dataset, '*.bdf'))))

for path_raw in sorted(glob.glob(os.path.join(local_paths.ePod_dataset, '*.bdf'))):
    process_raw(path_raw, path_processed, method = processing_method_widget.value, verbose = False)

print("All files processed")

#### Split into seperate files for each events

+ The following function splits the processed epochs up into into a seperate file for each event.
+ The sampling rate is also reduced to decrease the data size.

In [None]:
def split_clean_epochs(path_processed, sample_rate = 512):
    """
        This function splits the processed epochs up into into a seperate file for each event.
        The sampling rate is also reduced to decrease the data size.
        
        From path_processed, the function uses the epochs from the 'epochs' folder and saves them in 'epochs_split'.
    """

    montage = mne.channels.make_standard_montage('standard_1020') 
    info = mne.create_info(epodium.channel_names, 2048, ch_types='eeg')

    npy_filepaths = glob.glob(os.path.join(path_processed, 'epochs', '*.npy'))
    for npy_filepath in npy_filepaths:
        npy_filename = os.path.basename(npy_filepath)
        filename = os.path.splitext(npy_filename)[0]
        
        # Find missing files
        missing_split_paths = []
        for event in epodium.event_dictionary:
            split_filename = filename + "_" + event + ".npy"
            path_split = os.path.join(path_processed, 'epochs_split', split_filename)

            if not os.path.exists(path_split):
                missing_split_paths.append(path_split)
        
        # Split and save missing files
        if missing_split_paths != []:
            npy = np.load(os.path.join(path_processed, 'epochs', npy_filepath))
            events_12 = np.loadtxt(os.path.join(path_processed, 'events', filename + ".txt"), dtype=int)
            epochs = mne.EpochsArray(npy, info, events=events_12, tmin=-0.2, 
                                     event_id=epodium.event_dictionary, verbose=False)
            epochs.info.set_montage(montage, on_missing = 'ignore')

            for path_split in missing_split_paths: 
                np.save(path_split, epochs[event].resample(sample_rate).get_data())
                print(f"{os.path.basename(path_split)} saved")

split_clean_epochs(path_processed)

<br>


## Steps of ideal processing pipeline:

#### Processing Pipeline

+ Prepare EEG 
1. Drop unused channels
2. Subtract reference (mastoids)
3. Detrend 
4. Filter
5. Remove bad channels

+ Segment EEG into standard and deviant epochs
1. subtract baseline
2. Reject artefacts
3. Average to get the evoked for each subject, marker, and channel

+ Calculate Mismatch response 
1. deviant - standard for a single subject, for example GiepST_D - GiepST_S
2. check differences between channels and subjects

#### Analyse mismatch response 

Deviant minus standard ERP
+ Check between subjects to see if the subjects have similar responses
+ Check between channels to observe which parts of the brain are more influenced by the events


#### Extract features (Optional)
+ peak latency
+ peak amplitude
+ mean amplitude


#### Create labels


#### Input data into DL models

#### Visualise results of model predictions