# Applies Deep Learning to ePodium dataset for prediction of Dyslexia.

#### Import Packages

In [7]:
import mne
import numpy as np
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import ipywidgets

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import Precision, BinaryAccuracy, Recall
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from functions import epodium
from models.dnn import fully_connected_model
from models.transformer import TransformerModel

import local_paths

#### Choose which processed data to use

In [8]:
processing_method_widget = ipywidgets.RadioButtons(options=['autoreject', 'ransac'], description='processing:')
display(processing_method_widget)

RadioButtons(description='processing:', options=('autoreject', 'ransac'), value='autoreject')

In [None]:
if(processing_method_widget.value == "autoreject"):
    path_processed = local_paths.ePod_processed_autoreject
if(processing_method_widget.value == "ransac"):
    path_processed = local_paths.ePod_processed_ransac

<br>

## Preparing data as input to the deep learning models.

#### Check number of clean epochs* in each file after processing and split into train and test dataset

*In the context of electroencephalography (EEG), *epochs* are EEG segments in which an event occurs. During processing, the epochs are chosen to be 1 second in which the event occurs at 0.2s. In the context of deep learning, *epochs* are iterations over the entire training dataset.

In [3]:
train, test = epodium.train_test_datasets()

Analyzed: 214, bad: 40
174 files have enough epochs for analysis.
The dataset is split up into 129 train and 45 test experiments


#### Create Iterator Sequence as input to feed the model
https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence


In [4]:
train_sequence = epodium.EvokedDataIterator(train)
test_sequence = epodium.EvokedDataIterator(test)

x, y = train_sequence.__getitem__(0)
x.shape

(96, 32, 512)

#### Choose Deep Learning model

In [None]:
model_widget = ipywidgets.RadioButtons(options=['fully_connected', 'transformer'], description='processing:')
display(model_widget)

#### Train model

The data is an *evoked* or *ERP* from a participant in the ePodium experiment. 60 EEG signals were averaged from -0.2 to +0.8 seconds after onset of an event. This is done for each of the 12 event types seperately.

__dimensions__: 
+ x (batches, timesteps, channels)
+ y (batches, labels)

__labels__: 
+ (Sex, At risk of dyslexia, first standard, standard, deviant)


In [None]:
model = TransformerModel()
model.compile(optimizer=Adam(learning_rate=1e-4),
                         loss=BinaryCrossentropy(),
                         metrics=[Precision(), BinaryAccuracy(), Recall()])

output_filename = 'transformer_model'
output_file = os.path.join(local_paths.models, output_filename)
checkpointer = ModelCheckpoint(filepath = output_file + ".hdf5", monitor='val_loss', verbose=1, save_best_only=True)
earlystopper = EarlyStopping(monitor='val_loss', patience=1200, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=200, min_lr=0.0001, verbose=1)

history = model.fit(x=train_sequence,
                    validation_data=test_sequence,
                    epochs=100,
                    callbacks=[checkpointer, earlystopper, reduce_lr])

In [None]:
local_paths.models