#### Deep Learning

+ A model processes data and lables 
+ The model optimizes through a training loop
+ New dummy data is generated for each training loop
+ Able to use multiple models (standard is FNN)
+ Introduce Transformer model

TODO:
+ Show the confidence of a prediction (with softmax probability between 0 and 1) 
+ Compare multiple models on dummy data (ML, FNN, CNN, RNN, Transformer(Encoder))

#### Imports

In [6]:
import sklearn.model_selection as sk
import numpy as np   
import matplotlib.pyplot as plt
import os              
import sys
import random
import tensorflow as tf
import keras
from keras import layers
from IPython.display import clear_output

main_path = os.path.dirname(os.getcwd())
eegyolk_path = os.path.join(main_path, 'eegyolk')
sys.path.insert(0, eegyolk_path)
from eegyolk import dummy_data_functions as dummy
from eegyolk import display_helper as disp

#### Initialise Model

Feedforward neural network (FNN) 

In [2]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(1024,)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1)
])

model.compile(optimizer='sgd', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True))

#### Create batch of data

In [3]:
batch_size = 128

planck_distribution = dummy.generate_frequency_distribution("planck")
const_distribution = dummy.generate_frequency_distribution("constant")

def create_batch(batch_size):
  X = []
  Y = np.zeros(batch_size)

  for i in range(batch_size):
      if random.random() < 0.5:
          X.append(dummy.generate_epoch(const_distribution))
      else:
          X.append(dummy.generate_epoch(planck_distribution))
          Y[i] = 1

  return X, Y

#### Training loop:

In [11]:
for j in range(10):
  X_train, Y_train = create_batch(batch_size)
  model.fit(np.array(X_train), Y_train, epochs=4, batch_size=10)

  X_test, Y_test = create_batch(batch_size)
  model.evaluate(np.array(X_test),  Y_test, verbose=2)

  clear_output(wait=True)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
4/4 - 0s - loss: 0.5410 - precision_2: 0.6863 - binary_accuracy: 0.7422 - recall_2: 0.9859 - 49ms/epoch - 12ms/step


#### Transformer model:

Run *training loop* again after this cell, to use this transformer model instead of the feedforward NN.

The model is originally from:
+ Author: Bruce Shuyue Jia
+ Source: https://github.com/SuperBruceJia/EEG-DL/blob/master/Models/main-Transformer.py

In [10]:
  X_train, Y_train = create_batch(batch_size)

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.5):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out = self.layernorm2(out1 + ffn_output)
        return out


class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = tf.reshape(x, [-1, maxlen, embed_dim])
        out = x + positions
        return out

maxlen = 16      # Only consider 3 input time points
embed_dim = 64  # Features of each time point
num_heads = 8   # Number of attention heads
ff_dim = 64     # Hidden layer size in feed forward network inside transformer

# Input Time-series
inputs = layers.Input(shape=(maxlen*embed_dim,))
embedding_layer = TokenAndPositionEmbedding(maxlen, embed_dim)
x = embedding_layer(inputs)

# Encoder Architecture
transformer_block_1 = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim)
transformer_block_2 = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim)
x = transformer_block_1(x)
x = transformer_block_2(x)

# Output
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss="binary_crossentropy",
              metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Recall()])