## Processing raw EEG data

#### Import Packages 

In [1]:
import mne
import numpy as np
import pandas as pd
import autoreject

import os
import glob
import functools
import ipywidgets
# from multiprocessing import Pool

import local_paths
from functions import epodium

#### Filtering ePodium dataset and rejecting bad trials

The EEG data located in _local_paths.ePod_dataset_ is cleaned with the following techniques:
+ A high-pass filter on the raw EEG sequence with cutoff frequency 0.1 Hz to remove slow trends
+ Splitting the raw data into 1 second epochs in which the event occurs at 0.2s.
+ The epochs are cleaned with the autoreject library. This library contains classes that automatically reject bad trials and repair bad sensors in EEG data. The AutoReject and Ransac classes are used. https://autoreject.github.io/stable/index.html


+ A low-pass filter on the epochs 

<br>

### Cleaning raw files 
The *clean_raw* function cleans a raw file with the chosen method and saves the resulting .npy file into a new folder.

In [2]:
# These experiments are incomplete
ignore_files = ["113a", "107b (deel 1+2)", "132a", "121b(2)", "113b", "107b (deel 3+4)", "147a",
                "121a", "134a", "143b", "121b(1)", "145b", "152a", "184a", "165a", "151a", "163a",
                "207a", "215b", "201b"]

def process_raw(raw_path, dataset_info, processed_directory, verbose=False):
    """
        This function processes a raw EEG file from the MNE class.
        Processing steps: 1. high-pass filter, 2. create epochs, 3. low-pass filter, 4. AutoReject.
        The processed .fif file, along with a .txt file with events is stored in processed_directory.
        
        Args:
        raw_path: Path to the raw EEG-file
        dataset_info: Class containing information on the dataset, e.g. 
        processed_directory: Directory for storing the files.
    """    
    
    # Raw file-names
    file = os.path.basename(raw_path)
    filename, extension = os.path.splitext(file)
    
    # Paths for cleaned data    
    path_epoch = os.path.join(processed_directory, filename+"_epo.fif")
    path_events = os.path.join(processed_directory, "events", filename+"_events.txt")  

    # If file already processed:
    if os.path.exists(path_epoch) or os.path.exists(path_events):
        if verbose:
            print(f"File {file} already cleaned \n", end='')
        # If the event .txt file is missing:
        if not os.path.exists(path_events):
            print(f"Creating the event file {filename}.txt \n", end='')
            epochs_clean = mne.read_epochs(path_epoch, verbose=0)
            np.savetxt(path_events, epochs_clean.events, fmt='%i')
        return
    
    if filename in dataset_info.incomplete_experiments:
        if verbose:
            print(f"File {file} ignored \n", end='')
        return

    if verbose:
        print(f"Cleaning file: {file}  \n" , end='')
    
    # Read-in raw file
    if extension == ".bdf":
        raw = mne.io.read_raw_bdf(raw_path, preload=True, verbose=False)
    elif extension == ".cnt":
        raw = mne.io.read_raw_cnt(raw_path, preload=True, verbose=False)
    else:
        print(f"The file {raw_path} has doesn't exist or has an incompatible extension.")
    
    events = dataset_info.get_events_from_raw(raw)

    # Set electrodes
    raw.pick_channels(dataset_info.channel_names)
    raw.info.set_montage(dataset_info.mne_montage, on_missing='ignore')

    # High-pass filter for detrending
    raw.filter(0.1, None, verbose=False)
    
    # Create epochs from raw. Epoch creation sometimes returns an error.
    try:
        epochs = mne.Epochs(raw, events, epodium.event_dictionary, -0.2, 0.8, preload=True, verbose=False)
    except:
        print(f"Not all events in file {file} \n", end='')
        return
    
    # Low pass filter for high-frequency artifacts
    epochs.filter(None, 40, verbose=False)

    # Reject bad trials and repair bad sensors in EEG
    # autoreject.Ransac() is a quicker but less accurate method than AutoReject.
    ar = autoreject.AutoReject() 
    epochs_clean = ar.fit_transform(epochs)  

    # # Save data and events    
    epochs_clean.save(path_epoch)
    # np.save(path_cleaned_file, epochs_clean.get_data())   
    np.savetxt(path_events, epochs_clean.events, fmt='%i')

    
    
## Multiprocessing
# pool = Pool(processes=8)
# pool.map(functools.partial(file, method="autoreject"), sorted(glob.glob(os.path.join(local_paths.ePod_dataset, '*.bdf'))))

dataset_directory = local_paths.ePod_dataset
dataset_info = epodium.EpodiumClass()
processed_directory = local_paths.ePod_processed

paths_raw = sorted(glob.glob(os.path.join(dataset_directory, '*' + dataset_info.file_extension)))

for raw_path in paths_raw:
    process_raw(raw_path, dataset_info, processed_directory, verbose=True)

print("All files cleaned")

File 101a.bdf already cleaned 
File 101b.bdf already cleaned 
File 102a.bdf already cleaned 
File 102b.bdf already cleaned 
File 103a.bdf already cleaned 
File 103b.bdf already cleaned 
File 104a.bdf already cleaned 
File 104b.bdf already cleaned 
File 105a.bdf already cleaned 
File 105b.bdf already cleaned 
File 106a.bdf already cleaned 
File 106b.bdf already cleaned 
File 107a.bdf already cleaned 
File 107b (deel 1+2).bdf ignored 
File 107b (deel 3+4).bdf ignored 
File 108a.bdf already cleaned 
File 109a.bdf already cleaned 
File 109b.bdf already cleaned 
File 110a.bdf already cleaned 
File 110b.bdf already cleaned 
File 111a.bdf already cleaned 
File 111b.bdf already cleaned 
File 112a.bdf already cleaned 
File 112b.bdf already cleaned 
File 113a.bdf ignored 
File 113b.bdf ignored 
File 114a.bdf already cleaned 
File 114b.bdf already cleaned 
File 115a.bdf already cleaned 
File 115b.bdf already cleaned 
File 116a.bdf already cleaned 
File 116b.bdf already cleaned 
File 117a.bdf alre

AttributeError: 'EpodiumClass' object has no attribute 'group_events_12'

In [None]:
# TODO extern 

<br>

## Steps of ideal processing pipeline:

For an thorough explanation on ERP and processing, read the book: An Introduction to the Event-Related Potential Technique, Second Edition
By Steven J. Luck

#### Processing Pipeline

+ Prepare EEG 
1. Drop unused channels
2. Subtract reference (mastoids)
3. Detrend 
4. Filter
5. Remove bad channels

+ Segment EEG into standard and deviant epochs
1. subtract baseline
2. Reject artefacts
3. Average to get the evoked (ERP) for each subject, marker, and channel

+ Calculate Mismatch response 
1. deviant - standard for a single subject, for example GiepST_D - GiepST_S
2. check differences between channels and subjects

#### Analyse mismatch response 

Deviant minus standard ERP
+ Check between subjects to see if the subjects have similar responses
+ Check between channels to observe which parts of the brain are more influenced by the events


#### Extract features (Optional)
+ peak latency
+ peak amplitude
+ mean amplitude


#### Create labels


#### Input data into DL models

#### Visualise results of model predictions