# Deep Learning Model Analysis

This notebook contains three sections which analyse the data and models trained in the *model_training.ipynb* notebook:

1. [Load data and model](#1ma)
2. [Deep Learning Model](#2ma)
3. [Make predictions on testing set](#3ma)

#### Import Packages
Note: This notebook may output tensorflow errors if cuda is not properly installed. The notebook still functions with these errors.

In [12]:
import mne
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import ipywidgets
import tensorflow as tf
import pandas as pd
from IPython.display import clear_output

# Local
import local_paths
from functions import epodium, processing, display_helper
from functions.epodium import Epodium
from functions.ddp import DDP
from functions.train_and_predict import EpodiumSequence, DDPSequence

# Models
from models.dl_4_tsc import encoder_model, fully_convolutional_model, resnet_model
from models.eeg_dl import transformer_model


---
<br>

<a id="1ma"></a>
## 1. Load data and model

####  Load dataset

In [16]:
def load_dataset(dataset_name, participant_index):   
    if dataset_name == "epodium":
        dataset = Epodium()
        epochs_directory = local_paths.ePod_epochs
        event_directory = local_paths.ePod_epochs_events
        labels = dataset.create_labels(local_paths.ePod_metadata)
        experiment_list = processing.valid_experiments(dataset, event_directory)

    elif dataset_name == "ddp":
        dataset = DDP()
        epochs_directory = local_paths.DDP_epochs
        event_directory = local_paths.DDP_epochs_events    
        directory_age_metadata = os.path.join(local_paths.DDP_metadata, "ages")
        labels = dataset.create_labels(local_paths.DDP_dataset, directory_age_metadata)
        experiment_list = processing.valid_experiments(dataset, event_directory)
        experiment_list = list(set(experiment_list)-set(dataset.wrong_channels_experiments))
        sequence = DDPSequence(experiment_list, labels, epochs_directory, 
                            gaussian_noise=1e-6, n_trials_averaged=30, 
                            batch_size=1, n_instances_per_experiment=1)
    
    print(f"{dataset_name} dataset loaded.")

    x, y = sequence.__getitem__(participant_index, True)

    print(f"The shape of one data instance is {x[0].shape}")
    display_helper.plot_array_as_evoked(x[0]*1e-6, dataset.channels_epod_ddp, frequency=500, n_trials=30)
    print(f"In this experiment the age of the participant is {int(y[0])} days.")
    print("Beware, the absolute value of the y-axis is meaningless due to data normalization. "
          "Each data-point is divided by the standard deviation of all the signals.")

dataset_name = ipywidgets.RadioButtons(options=["epodium", "ddp"], 
                                       description='Dataset:', 
                                       value="epodium")
index = ipywidgets.IntSlider(description="participant")

ui = ipywidgets.HBox([dataset_name, slider])
out = ipywidgets.interactive_output(load_dataset, {'dataset_name': dataset_name, "participant_index": index})
display(ui, out)

HBox(children=(RadioButtons(description='Dataset:', options=('epodium', 'ddp'), value='epodium'), IntSlider(va…

Output()

### Load a trained model
Choose from the trained models in the _local_paths.models_ folder.

In [3]:
# Required plot setting:
%matplotlib inline 

# history = []
                                       
def load_model(trained_model):
    base_path = os.path.join(local_paths.models, trained_model)
    path_history = os.path.join(base_path, "history.npy")
    path_model = os.path.join(base_path, "model")
    path_testset = os.path.join(base_path, "subsets", "test_set.txt")
    path_weights = os.path.join(base_path, "weights.h5")
    
    global model
    global testset
    global history

    # Load Model
    if(os.path.exists(path_model)):
        print(f"Loading Model: '{model_widget.value}'.")

        # Loads the entire model from a folder:
        model = tf.keras.models.load_model(path_model)
        model.load_weights(path_weights)
        # Reads the test-set of the trained model and puts the experiment names into a list:
        testset = open(path_testset, "r").read().split()
        # Loads the training history dictionary:
        history = np.load(path_history, allow_pickle=True).item()
        
        # Show Loss of Training History
        display_helper.show_plot(x=range(len(history['loss'][:])), y=history['loss'][:],  ylim=[0,600000], 
                                 xlabel="epochs", ylabel="validation loss", title="Loss during training")
        display_helper.show_plot(x=range(len(history['loss']))[:], y=history['val_loss'][:], ylim=[0,600000],
                                 xlabel="epochs", ylabel="validation loss", title="Validation loss during training")
        print(f"The lowest validation loss is {round(min(history['val_loss']), 3)}")

    else: 
        print("Model not found")

# Find all models in 'local_paths.models' 
trained_models = sorted(f for f in os.listdir(os.path.join(local_paths.models)) if not f.startswith("."))

# Widget for selecting the trained models
model_widget = ipywidgets.RadioButtons(options=trained_models, description='Models:')
display(model_widget)
out = ipywidgets.interactive_output(load_model, {'trained_model': model_widget})
display(out)

RadioButtons(description='Models:', options=('ddp_encoder_age_26ch_500hz', 'ddp_encoder_age_26ch_500hz_2', 'dd…

Output()

#### Visualise data instance

During processing, the epochs are chosen to be 1 second in which the event occurs at 0.2s.

In [4]:
random_participant_index = 0

temp_sequence = DDPSequence(testset, ddp_labels, epochs_directory, 
                            gaussian_noise=1e-6, n_trials_averaged=30, 
                            n_experiments_batch=1, n_instances_per_experiment=1)

x, y = temp_sequence.__getitem__(random_participant_index, True)

print(f"The shape of one data instance is {x[0].shape}")
display_helper.plot_array_as_evoked(x[0]*1e-6, dataset.channels_epod_ddp, frequency=500, n_trials=30)
print(f"In this experiment the age of the participant is {int(y[0])} days.")
print("Beware, the absolute value of the y-axis is meaningless due to data normalization. "
      "Each data-point is divided by the standard deviation of all the signals.")

NameError: name 'ddp_labels' is not defined

<br>

<a id='2ma'></a>
## 2. Make predictions on test set


In [None]:
n_passthroughs = 1
results = []

# For each experiment in the test-set
for i, experiment in enumerate(testset):
    test_sequence = DDPSequence([experiment], 
                                ddp_labels, 
                                epochs_directory, 
                                n_experiments_batch=1, 
                                n_instances_per_experiment=n_passthroughs)
    x, y = test_sequence.__getitem__(0)
    
    # Make a prediction with the model.
    real_pred = [y[0], np.squeeze(model.predict(x, verbose=0)).mean()]
    results.append(real_pred)
    
    print(f"{i+1}/{len(testset)} predicted.")
    clear_output(wait=True)
    
results = np.array(results)

In [None]:
display_helper.show_plot(np.array(results)[:,0], 
                         np.array(results)[:,1], 
                         f"Age prediction (encoder_age_128_20, passes: {n_passthroughs})", 
                         "Actual age (days)",
                         "Predicted age (days)",
                         scatter=True,
                         show=False)
plt.grid()
plt.plot([300, 1450], [300, 1450]) # Line where predicted=actual