**To-do List**


*   Current model
  *   Issue with *class_weight* when fitting the model
  * LSTM parameters to choose
  * Add regularisation terms
*   RNN-GP
  * Obtain the dense layers from the model
  * Use them as features into a GP
  *  Derive metrics (Accuracy) from GPFlow output
  *  Multi-output GP (base class and class)




Utilities

In [None]:
import tensorflow as tf
import numpy as np
from keras.layers import  Dense, Flatten, Activation, Dropout, Embedding, Conv1D, Conv2D, MaxPooling2D, MaxPooling1D, Concatenate, BatchNormalization, GaussianNoise
from keras.layers import LSTM, TimeDistributed, Permute, Reshape, Lambda, RepeatVector, merge, Input, Multiply, SimpleRNN, GRU, LeakyReLU
from keras.utils import np_utils
from keras.layers.wrappers import  Bidirectional
import h5py as h5
import matplotlib.pyplot as plt
from keras.models import Model, Sequential
from keras_self_attention import SeqSelfAttention, SeqWeightedAttention

In [None]:
pip install keras_self_attention

Collecting keras_self_attention
  Downloading https://files.pythonhosted.org/packages/c3/34/e21dc6adcdab2be03781bde78c6c5d2b2136d35a1dd3e692d7e160ba062a/keras-self-attention-0.49.0.tar.gz
Building wheels for collected packages: keras-self-attention
  Building wheel for keras-self-attention (setup.py) ... [?25l[?25hdone
  Created wheel for keras-self-attention: filename=keras_self_attention-0.49.0-cp37-none-any.whl size=19468 sha256=21ecd3da96554ed24adbea575196a37ad7ee42b4a022541d0b7a6ce4e4b7ee34
  Stored in directory: /root/.cache/pip/wheels/6f/9d/c5/26693a5092d9313daeae94db04818fc0a2b7a48ea381989f34
Successfully built keras-self-attention
Installing collected packages: keras-self-attention
Successfully installed keras-self-attention-0.49.0


Train and Test Data

In [None]:
INPUT_DIM = 8    # 
TIME_STEPS = 500  # The step of RNN

hf_Train = h5.File('/content/sample_data/Fold_10_Train_Data_500.h5', 'r')
hf_Test = h5.File('/content/sample_data/Fold_10_Test_Data_500.h5', 'r')

X_train = hf_Train['Train_Data'] # Get train set
X_train = np.array(X_train)
Y_train = hf_Train['Label']      # Get train label
Y_train = np.array(Y_train)

X_test = hf_Test['Train_Data']     # Get test set
X_test = np.array(X_test)
Y_test = hf_Test['Label']       # Get test label
Y_test = np.array(Y_test)

Y_train = np_utils.to_categorical(Y_train, 13)  # Process the label of tain
Y_test = np_utils.to_categorical(Y_test, 13)    #  Process the label of test

In [None]:
X_train.shape

(5688, 500, 8)

Attention Mechanism


*   Could be replaced with 
   * *from keras_self_attention import SeqSelfAttention, SeqWeightedAttention*
   * *model.add(SeqWeightedAttention())*
* What about multi-head attention?




In [None]:
def attention_3d_block(inputs): # Attention Mechanism
    input_dim = int(inputs.shape[2])
    a = Permute((2, 1))(inputs)
    a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.
    a = Dense(TIME_STEPS, activation='softmax', kernel_initializer='RandomNormal', bias_initializer='zeros')(a)
    a_probs = Permute((2, 1), name='attention_vec')(a)
    output_attention_mul = Multiply()([inputs, a_probs])
    return output_attention_mul

LSTM

In [None]:
def model_attention_applied_after_lstm(): # Model
    inputs = Input(shape=(TIME_STEPS, INPUT_DIM,))
    lstm_units = 128
    lstm_out = Bidirectional(LSTM(lstm_units, return_sequences=True, kernel_initializer='RandomNormal', dropout= 0.3, recurrent_dropout = 0.3, recurrent_initializer='RandomNormal', bias_initializer='zero'))(inputs)
    attention_mul = attention_3d_block(lstm_out)
    attention_mul = Flatten()(attention_mul)
    dense_one = Dense(128, kernel_initializer='RandomNormal', bias_initializer='zeros', activation='relu')(attention_mul)
    dense_one = Dropout(0.4)(dense_one)
    dense_two = Dense(64, kernel_initializer='RandomNormal', bias_initializer='zeros', activation='relu')(dense_one)
    dense_two = Dropout(0.4)(dense_two)
    output = Dense(13, activation='softmax')(dense_two)
    model = Model([inputs], output)
    return model

CNN

In [None]:
def model_with_cnn(input_shape):
    model = Sequential([
        Conv1D(128, 3, padding = 'same', input_shape = input_shape),
        LeakyReLU(),
        MaxPooling1D(3),
        BatchNormalization(),
        GaussianNoise(0.05),
        #Bidirectional(GRU(128, return_sequences=True, kernel_initializer='RandomNormal', dropout= 0.3, recurrent_dropout = 0.3, recurrent_initializer='RandomNormal', bias_initializer='zero')),
        Conv1D(128, 3, padding = 'same'),
        LeakyReLU(),
        Conv1D(128, 3, padding = 'same'),
        LeakyReLU(),
        MaxPooling1D(3),
        BatchNormalization(),
        GaussianNoise(0.05),
        Conv1D(256,3, padding = 'same'),
        LeakyReLU(),
        Conv1D(256,3, padding = 'same'),
        LeakyReLU(),
        MaxPooling1D(3),
        BatchNormalization(),
        GaussianNoise(0.05),
        Flatten(),
        Dense(128, kernel_initializer='RandomNormal', bias_initializer='zeros', activation='relu'),
        Dropout(0.2),
        Dense(64, kernel_initializer='RandomNormal', bias_initializer='zeros', activation='relu'),
        Dropout(0.2),
        Dense(13, activation='softmax')
    ])
    return model

The improved architecture is composed of 5 CNN layers interleaved with batch normalization, Leaky ReLU activation, and max-pooling. Gaussian Noise to reduce overfitting is added every 2 CNN layers and a dropout rate at 20% is added after the last CNN layer.

The network is completed with two dense layers, respectively of 128 and 64 units,to reduce input dimensions, and a final softmax layer for the output class. AMSGrad optimization with a learning rate at 0.0005 has been adopted in the learning step.

In [None]:
def model_with_rnn(input_shape):
    
    #RNN part
    inputs = Input(shape=(TIME_STEPS, INPUT_DIM,))
    lstm_units = 128
    lstm_one = Bidirectional(GRU(lstm_units, return_sequences=True, kernel_initializer='RandomNormal', dropout= 0.5, recurrent_dropout = 0.5, recurrent_initializer='RandomNormal', bias_initializer='zero'))(inputs)
    lstm_two = Bidirectional(GRU(lstm_units, return_sequences=True, kernel_initializer='RandomNormal', dropout= 0.5, recurrent_dropout = 0.5, recurrent_initializer='RandomNormal', bias_initializer='zero'))(lstm_one)
    lstm_two = GaussianNoise(0.05)(lstm_two)
    attention = SeqWeightedAttention()(lstm_two)
    #attention_mul = attention_3d_block(lstm_two)
    attention = Flatten()(attention)
    rnnoutput = Dense(256,kernel_initializer='RandomNormal', bias_initializer='zeros')(attention)
    rnnoutput = Dropout(0.3)(rnnoutput)

    #CNN part
    conv1 = Conv1D(64, 3, padding = 'same', input_shape = input_shape)(inputs)
    conv1 = LeakyReLU()(conv1)
    max1 = MaxPooling1D(3)(conv1)
    max1 = BatchNormalization()(max1)
    max1 = GaussianNoise(0.05)(max1)
    conv2 = Conv1D(128, 3, padding = 'same')(max1)
    conv2 = LeakyReLU()(conv2)
    conv3 = Conv1D(128, 3, padding = 'same')(conv2)
    conv3 = LeakyReLU()(conv3)
    max2 = MaxPooling1D(3)(conv3)
    max2 = BatchNormalization()(max2)
    max2 = GaussianNoise(0.05)(max2)
    conv4 = Conv1D(256,3, padding = 'same')(max2)
    conv4 = LeakyReLU()(conv4)
    conv5 = Conv1D(256,3, padding = 'same')(conv4)
    conv5 = LeakyReLU()(conv5)
    max3 = MaxPooling1D(3)(conv5)
    max3 = BatchNormalization()(max3)
    max3 = GaussianNoise(0.05)(max3)
    max3 = Flatten()(max3)
    cnnoutput = Dense(256,kernel_initializer='RandomNormal', bias_initializer='zeros')(max3)
    cnnoutput = LeakyReLU()(cnnoutput)
    cnnoutput = Dropout(0.3)(cnnoutput)

    #Concatenated output
    jointoutput = tf.keras.layers.Concatenate()([rnnoutput,cnnoutput])

    #Dense Feed-forward
    dense_one = Dense(128, kernel_initializer='RandomNormal', bias_initializer='zeros')(jointoutput)
    dense_one = LeakyReLU()(dense_one)
    dense_one = Dropout(0.3)(dense_one)
    dense_two = Dense(64, kernel_initializer='RandomNormal', bias_initializer='zeros')(dense_one)
    dense_two = LeakyReLU()(dense_two)
    dense_two = Dropout(0.3)(dense_two)
    
    #Output
    output = Dense(13, activation='softmax')(dense_two)
    model = Model([inputs], output)
    return model
    
    # model = Sequential([
    #     Bidirectional(GRU(128, return_sequences = True, dropout= 0.5, recurrent_dropout = 0.5, activation = 'relu', input_shape = input_shape)),
    #     Bidirectional(GRU(128, return_sequences = True, dropout = 0.5, recurrent_dropout = 0.5, activation = 'relu')),
    #     Bidirectional(GRU(64, dropout = 0.5, recurrent_dropout = 0.5, activation = 'relu')),
    #     SeqWeightedAttention(),
    #     Flatten(),
    #     Dense(128, kernel_initializer='RandomNormal', bias_initializer='zeros', activation='relu'),
    #     Dropout(0.5),
    #     Dense(64, kernel_initializer='RandomNormal', bias_initializer='zeros', activation='relu'),
    #     Dropout(0.5),
    #     Dense(13, activation='softmax')
    # ])
    # return model

Deep RNN with Attention


*   GRU instead of LSTM



In [None]:
def model_with_pure_rnn(input_shape):
    
    #RNN part
    inputs = Input(shape=(TIME_STEPS, INPUT_DIM,))
    lstm_units = 128
    lstm_one = Bidirectional(GRU(lstm_units, return_sequences=True, kernel_initializer='RandomNormal', dropout= 0.5, recurrent_dropout = 0.5, recurrent_initializer='RandomNormal', bias_initializer='zero'))(inputs)
    lstm_two = Bidirectional(GRU(lstm_units, return_sequences=True, kernel_initializer='RandomNormal', dropout= 0.5, recurrent_dropout = 0.5, recurrent_initializer='RandomNormal', bias_initializer='zero'))(lstm_one)
    lstm_two = GaussianNoise(0.05)(lstm_two)
    attention = SeqWeightedAttention()(lstm_two)
    #attention_mul = attention_3d_block(lstm_two)
    attention = Flatten()(attention)
    rnnoutput = Dense(256,kernel_initializer='RandomNormal', bias_initializer='zeros')(attention)
    rnnoutput = Dropout(0.3)(rnnoutput)

    #Dense Feed-forward
    dense_one = Dense(128, kernel_initializer='RandomNormal', bias_initializer='zeros')(rnnoutput)
    dense_one = LeakyReLU()(dense_one)
    dense_one = Dropout(0.3)(dense_one)
    dense_two = Dense(64, kernel_initializer='RandomNormal', bias_initializer='zeros')(dense_one)
    dense_two = LeakyReLU()(dense_two)
    dense_two = Dropout(0.3)(dense_two)

    #Output
    output = Dense(13, activation='softmax')(dense_two)
    model = Model([inputs], output)
    return model



In [None]:
sum(np.mean(Y_train, axis=0))

array([0.07911392, 0.07911392, 0.07911392, 0.07911392, 0.07911392,
       0.07911392, 0.07911392, 0.07911392, 0.07911392, 0.07911392,
       0.05063291, 0.07911392, 0.07911392], dtype=float32)

Model Fit

In [None]:
m = model_attention_applied_after_lstm()
m.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
history = m.fit(X_train, Y_train, batch_size=128, epochs= 50, class_weight= None, validation_data=(X_test, Y_test)) # Train 50 epochs
m.save("Tenth_Fold_New_Model_500_8") #Save the model

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
INFO:tensorflow:Assets written to: Tenth_Fold_New_Model_500_8/assets


In [None]:
input_shape = X_train[0].shape
mCNN = model_with_cnn(input_shape)
mCNN.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
historyCNN = mCNN.fit(X_train, Y_train, batch_size=128, epochs= 50, class_weight= None, validation_data=(X_test, Y_test)) # Train 50 epochs
mCNN.save("CNN_Tenth_Fold_New_Model_500_8") #Save the model

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
INFO:tensorflow:Assets written to: CNN_Tenth_Fold_New_Model_500_8/assets


In [None]:
input_shape = X_train[0].shape
mRNN = model_with_rnn(input_shape)
mRNN.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
historyRNN = mRNN.fit(X_train, Y_train, batch_size=128, epochs= 50, class_weight= None, validation_data=(X_test, Y_test)) # Train 50 epochs
mRNN.save("RNN_Tenth_Fold_New_Model_500_8") #Save the model

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50

In [None]:
input_shape = X_train[0].shape
mRNNpure = model_with_pure_rnn(input_shape)
mRNNpure.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
historyRNN = mRNNpure.fit(X_train, Y_train, batch_size=128, epochs= 50, class_weight= None, validation_data=(X_test, Y_test)) # Train 50 epochs
mRNNpure.save("Pure_RNN_Tenth_Fold_New_Model_500_8") #Save the model

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50

Accessing Layers

In [None]:
m = tf.keras.models.load_model("Tenth_Fold_New_Model_500_8")
mCNN = tf.keras.models.load_model("CNN_Tenth_Fold_New_Model_500_8")

OSError: ignored

In [None]:
m.layers[11].output.shape

TensorShape([None, 64])

In [None]:
mCNN.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_46 (Conv1D)           (None, 500, 64)           1600      
_________________________________________________________________
max_pooling1d_24 (MaxPooling (None, 166, 64)           0         
_________________________________________________________________
conv1d_47 (Conv1D)           (None, 166, 128)          24704     
_________________________________________________________________
conv1d_48 (Conv1D)           (None, 166, 128)          49280     
_________________________________________________________________
max_pooling1d_25 (MaxPooling (None, 55, 128)           0         
_________________________________________________________________
conv1d_49 (Conv1D)           (None, 55, 256)           98560     
_________________________________________________________________
conv1d_50 (Conv1D)           (None, 55, 256)         

In [None]:
mCNN.layers[12].output.shape

TensorShape([None, 64])

In [None]:
X_train_temp = tf.reshape(X_train,[X_train.shape[0],X_train.shape[1]*X_train.shape[2]])
X_train_temp = np.array(X_train_temp, dtype = "float64")
X_test_temp = tf.reshape(X_test,[X_test.shape[0],X_test.shape[1]*X_test.shape[2]])
X_test_temp = np.array(X_test_temp, dtype = "float64")

In [None]:
X_test_temp.shape

(632, 4000)

In [None]:
new_temp_model_cnn = Model(mCNN.input, mCNN.layers[12].output)
new_temp_model_attention = Model(m.input, m.layers[11].output)

Creating Auxiliary Dataset

In [None]:
# At this point, we wish to create a new dataset for the GP process
Y_train_new = np.array(hf_Train['Label'], dtype='float64')
Y_test_new = np.array(hf_Test['Label'], dtype='float64')

In [None]:
X_train_cnn = np.array(new_temp_model_cnn.predict(X_train), dtype='float64')
X_train_cnn = np.array(X_train_cnn, dtype='float64')
X_test_cnn = new_temp_model_cnn.predict(X_test)
X_test_cnn = np.array(X_test_cnn, dtype='float64')

In [None]:
X_train_cnn.shape

(5688, 64)

In [None]:
X_train_attention = np.array(new_temp_model_attention.predict(X_train), dtype='float64')
X_train_attention = np.array(X_train_attention, dtype='float64')
X_test_attention = new_temp_model_attention.predict(X_test)
X_test_attention = np.array(X_test_attention, dtype='float64')

In [None]:
X_train_attention.shape

(5688, 64)

In [None]:
X_train_new = np.concatenate((X_train_cnn, X_train_attention), axis = 1)
X_test_new = np.concatenate((X_test_cnn, X_test_attention), axis = 1)

In [None]:
X_train_new.shape

(5688, 128)

Introducing Gaussian Process

In [None]:
import gpflow
import numpy as np

import matplotlib.pyplot as plt
import tensorflow as tf

import gpflow
from gpflow.utilities import ops, print_summary, set_trainable
from gpflow.config import set_default_float, default_float, set_default_summary_fmt
from gpflow.ci_utils import ci_niter

import warnings
warnings.filterwarnings("ignore")  # ignore DeprecationWarnings from tensorflow

%matplotlib inline

set_default_float('float64')

#from tensorflow2_work.multiclass_classification import plot_posterior_predictions, colors

# reproducibility:
np.random.seed(0)
tf.random.set_seed(123)

data = (X_train_new, Y_train_new)

Declaring the SVGP model

In [None]:
# sum kernel: Matern32 + White
lengthscales = tf.convert_to_tensor([1.0] * X_train_new.shape[1], dtype=default_float())
kernel = gpflow.kernels.Matern32(lengthscales=lengthscales) #+ gpflow.kernels.White(variance=0.01)

# Robustmax Multiclass Likelihood
invlink = gpflow.likelihoods.RobustMax(13)  # Robustmax inverse link function
likelihood = gpflow.likelihoods.MultiClass(13, invlink=invlink)  # Multiclass likelihood
M = 80  # Number of inducing locations
Z = X_train_new[::M].copy()  # inducing inputs CHECK DIMENSIONS

mGP = gpflow.models.SVGP(
    kernel=kernel,
    likelihood=likelihood,
    inducing_variable=Z,
    num_latent_gps=13,
    whiten=True,
    q_diag=False,
)

# Only train the variational parameters
#set_trainable(mGP.kernel.kernels[1].variance, False)
#set_trainable(mGP.inducing_variable, False)
print_summary(mGP, fmt="notebook")

name,class,transform,prior,trainable,shape,dtype,value
SVGP.kernel.variance,Parameter,Softplus,,True,(),float64,1.0
SVGP.kernel.lengthscales,Parameter,Softplus,,True,"(128,)",float64,"[1., 1., 1...."
SVGP.likelihood.invlink.epsilon,Parameter,Sigmoid,Beta,False,(),float64,0.0010000000000000005
SVGP.inducing_variable.Z,Parameter,Identity,,True,"(72, 128)",float64,"[[0.00000000e+00, 1.10954475e+01, 0.00000000e+00..."
SVGP.q_mu,Parameter,Identity,,True,"(72, 13)",float64,"[[0., 0., 0...."
SVGP.q_sqrt,Parameter,FillTriangular,,True,"(13, 72, 72)",float64,"[[[1., 0., 0...."


Running Inference

In [None]:
opt = gpflow.optimizers.Scipy()

opt_logs = opt.minimize(
    mGP.training_loss_closure(data), mGP.trainable_variables, options=dict(maxiter=ci_niter(10000))
)
print_summary(mGP, fmt="notebook")

NameError: ignored

In [None]:
(Y_test_GP_mean, Y_test_GP_variance) = mGP.predict_y(X_test_new)

NameError: ignored

In [None]:
Y_test_pred = []
for i in range(X_test_new.shape[0]):
  Y_test_pred.append(np.argmax(Y_test_GP_mean.numpy()[i,]))
diffs = Y_test_pred - Y_test_new
test_acc = sum(1 for x in diffs if x == 0)/X_test_new.shape[0]
print(test_acc)

0.8306962025316456


Plot