# Trains Deep Learning Models to predict labels in ePodium dataset.

TODO: More explanation notebook

#### Import Packages

In [1]:
import mne
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import pandas as pd

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError
from tensorflow.keras.metrics import Precision, BinaryAccuracy, Recall
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

import local_paths
from functions import processing, display_helper, data_io
from functions.epodium import Epodium
epodium = Epodium()

from models import transformer
from models.dnn import fully_connected_model
from models.hfawaz import cnn, encoder

<br>



---
<a id='1'></a>
## 1. Prepare Dataset

__input dimensions__: 
+ x (batches, timesteps, channels)
+ y (batches, labels)

__labels__: 
+ Binary: Sex, At risk of dyslexia, Group a/b
+ Regressive: Age, Vocabulary



In [2]:
dataset_name = "epodium" # "ddp"

if dataset_name == "epodium":
    dataset = Epodium()
    epochs_directory = local_paths.ePod_epochs
    event_directory = local_paths.ePod_epochs_events
    
    epod_children, epod_cdi, epod_parents, epod_codes = \
        data_io.load_metadata(local_paths.ePod_metadata, epodium.metadata_filenames)
    
# TODO OUTPUT LABELS metadata

#### Split processed epochs* into train and test sequence.

*In the context of electroencephalography (EEG), *epochs* are EEG segments in which an event occurs. During processing, the epochs are chosen to be 1 second in which the event occurs at 0.2s. In the context of deep learning, *epochs* are iterations over the entire training dataset.

First choose which processed data to use

In [3]:
experiment_list = processing.valid_experiments(dataset, event_directory, min_standards=180, min_deviants=80)
train, test = epodium.split_train_test_datasets(experiment_list)

Analyzed: 229, bad: 43
186 experiments have enough epochs for analysis.


TypeError: 'Epodium' object is not iterable

#### Preparing data iterator (Sequence) as input to the deep learning models.
https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence

In [None]:
train_sequence = epodium.EvokedDataIterator(train, , gaussian_noise=1e-6)
test_sequence = epodium.EvokedDataIterator(test, )

#### Visualise data instance

In [None]:
x, y = test_sequence.__getitem__(6)
print(f"The shape of one data instance is {x[0].shape}")

index = 15 # 0 to 63
epodium.plot_array_as_evoked(x[index][:32], frequency=128)
epodium.plot_array_as_evoked(x[index][32:], frequency=128)

<br>

## Deep Learning

The data is an *evoked* or *ERP* from a participant in the ePodium experiment. 60 EEG signals were averaged from -0.2 to +0.8 seconds after onset of an event. This is done for each of the 12 event types seperately.




#### Train model

In [None]:
model_name = "encoder_age_128_3"
model = encoder((64,128), 1)
epochs = 300
learning_rate = 1e-5

# Paths to save model info
base_path = os.path.join(local_paths.models, model_name)

path_history = os.path.join(base_path, "history.npy")
path_model = os.path.join(base_path, "model")
path_testset = os.path.join(base_path, "testset.txt")
path_weights = os.path.join(base_path, "weights.h5")

if os.path.exists(path_model):
    print(f"Model: '{model_name}' already exist. Delete the existing model first or rename this model.")    
else:
    print(f"Create model: {model_name}")
    if not os.path.exists(base_path):
        os.mkdir(base_path)

    # Save validation-set for future testing
    with open(path_testset, 'w') as f:
        for participant in test:
            f.write(participant + '\n')

    model.compile(optimizer=Adam(learning_rate=learning_rate), loss=MeanSquaredError()) # , metrics=[Precision(), BinaryAccuracy(), Recall()]

    # Fit model
    checkpointer = ModelCheckpoint(filepath=path_weights, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, factor=0.7, verbose=1) # add to callbacks if uncomment
    history = model.fit(x=train_sequence, validation_data=test_sequence, epochs=epochs, callbacks=[checkpointer])

    np.save(path_history, history.history)
    model.save(path_model)

#### Show Training History

In [None]:
display_helper.show_plot(x=range(len(history.history['loss'])), y=history.history['loss'], xlabel="epochs", ylabel="validation loss", title=f"Loss during training ({model_name})")
display_helper.show_plot(x=range(len(history.history['loss'])), y=history.history['val_loss'], xlabel="epochs", ylabel="validation loss", title=f"Validation loss during training ({model_name})")