## Load/import packages

In [2]:
import json
import scipy
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras.backend as K
import kerastuner as kt

from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Layer
from tensorflow.keras.losses import CategoricalCrossentropy
from sklearn.utils import class_weight

from os import listdir
from os.path import join, splitext, normpath

# Import modules to run custom FW-RNN cell
from tensorflow.python.keras.layers.recurrent import (
    _generate_zero_filled_state_for_cell,
    _generate_zero_filled_state,
    ops,
    tensor_shape,
    activations,
    initializers,
    regularizers,
    nest,
    array_ops,
)

# Import variables and functions from my own scripts
from functions import plot_history, arr_replacevalue
from load_features import (
    train_features_AW2,
    val_features_AW2,
    train_labels_AW2,
    val_labels_AW2,
    labels_reshaper,
    features_reshaper,
)

%matplotlib inline

# Limit GPU memory usage
for device in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(device, True)

# Prepare data

In [3]:
# Reshape data to specified sequence length
length = 120
seq_train_features = features_reshaper(train_features_AW2, length)
seq_val_features = features_reshaper(val_features_AW2, length)

seq_train_labels = labels_reshaper(train_labels_AW2, length)
seq_val_labels = labels_reshaper(val_labels_AW2, length)

In [5]:
def comp_sampleweights(labels):
    # Convert one-hot encoded labels back to label integers
    train_label_ints = np.argmax(labels, axis=2)

    # Compute class weights with sklearn
    class_weights = class_weight.compute_class_weight(
        "balanced", np.unique(train_label_ints), train_label_ints.flatten()
    )
    d_class_weights = dict(enumerate(class_weights))

    # Pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample
    return arr_replacevalue(train_label_ints, d_class_weights)
    
train_samples_weights = comp_sampleweights(seq_train_labels) 

In [285]:
# Basic RNN cell
class RNNCell(layers.Layer):
    def __init__(self, units, use_bias, activation, step, **kwargs):
        super(RNNCell, self).__init__(**kwargs)
        self.units = units
        self.step = step
        self.use_bias = use_bias
        self.activation = activations.get(activation)

        self.state_size = self.units
        self.output_size = self.units

        # Initializer for the kernel weights matrix, used for the linear transformation of the inputs
        self.kernel_initializer = initializers.get("glorot_uniform")

        # Initializer for the bias vector.
        self.bias_initializer = initializers.get("zeros")

        # Initializer for the recurrent_kernel (hidden) weights matrix, used for the linear
        # transformation of the recurrent state.
        self.recurrent_initializer = initializers.get("identity")

    def build(self, input_shape):
        self.kernel = self.add_weight(
            shape=(input_shape[-1], self.units),
            name="kernel",
            initializer=self.kernel_initializer,
        )
        self.recurrent_kernel = self.add_weight(
            shape=(self.units, self.units),
            name="recurrent_kernel",
            initializer=self.recurrent_initializer,
        )
        if self.use_bias:
            self.bias = self.add_weight(
                shape=(self.units,), name="bias", initializer=self.bias_initializer,
            )
        else:
            self.bias = None
        self.built = True

    def call(self, inputs, states, training=None):
        print("Input weights:", self.kernel.shape)
        print("Hidden state weights:", self.recurrent_kernel.shape)
        
        prev_output = states[0] if nest.is_sequence(states) else states
        
        print("Prev output:", prev_output.shape)
        h = K.dot(inputs, self.kernel)
        print("hidden state:", h.shape)
        if self.bias is not None:
            h = K.bias_add(h, self.bias)

        output = h + K.dot(prev_output, self.recurrent_kernel)
        if self.activation is not None:
            output = self.activation(output)

        new_state = [output] if nest.is_sequence(states) else output
        return output, new_state
    def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
        return _generate_zero_filled_state_for_cell(self, inputs, batch_size, dtype)

In [286]:
# Build model with sequential api
def build_model():
    model = Sequential(name="FW-RNN")
    model.add(tf.keras.Input(shape=(seq_train_features.shape[1], seq_train_features.shape[2])))
    model.add(
        layers.RNN(
            RNNCell(units=64, 
                       use_bias=False, 
                       activation="relu", 
                       step=1,
                   ),
            return_sequences=True,
            name = 'FW-RNN'
        )
    )
    model.add(layers.LayerNormalization(name = 'LN'))
    model.add(layers.Dense(7, activation="softmax", name="Dense_Output"))
    model.compile(
        optimizer="adagrad",
        loss=CategoricalCrossentropy(label_smoothing=0.1),
        metrics=["accuracy", "AUC"],
    )
    return model

rnn = build_model()
rnn.summary()

Input weights: (4608, 64)
Hidden state weights: (64, 64)
Prev output: (None, 64)
hidden state: (None, 64)
Input weights: (4608, 64)
Hidden state weights: (64, 64)
Prev output: (None, 64)
hidden state: (None, 64)
Model: "FW-RNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
FW-RNN (RNN)                 (None, 120, 64)           299008    
_________________________________________________________________
LN (LayerNormalization)      (None, 120, 64)           128       
_________________________________________________________________
Dense_Output (Dense)         (None, 120, 7)            455       
Total params: 299,591
Trainable params: 299,591
Non-trainable params: 0
_________________________________________________________________


# Create FW-RNN Cell
-  Build custom FW_RNN cell and wrap it in RNN layer (https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN), like this: RNN(FW_RNN)
    -  "The cell abstraction, together with the generic keras.layers.RNN class, make it very easy to implement custom RNN architectures for your research."

Created by using this guide: https://www.tensorflow.org/guide/keras/custom_layers_and_models

In [89]:
from tensorflow.linalg import matmul

In [296]:
class FW_RNNCell(layers.Layer):
    def __init__(
        self, units, use_bias, decay_rate, learning_rate, activation, step, **kwargs
    ):
        super(FW_RNNCell, self).__init__(**kwargs)
        self.units = units
        self.step = step
        self.use_bias = use_bias
        self.activation = activations.get(activation)
        self.l = decay_rate
        self.e = learning_rate

        self.state_size = self.units

        # Initializer for the slow input-to-hidden weights matrix
        self.C_initializer = initializers.get("glorot_uniform")

        # Initializer for the slow hidden weights matrix
        self.W_h_initializer = initializers.get("identity")

        # Initializer for the fast weights matrix
        self.A_initializer = initializers.get("zeros")

        # Initializer for the bias vector.
        self.b_x_initializer = initializers.get("zeros")

    def build(self, input_shape):
        # Build is only called at the start, to initialize all the weights and biases

        # C = Slow input-to-hidden weights [shape (4608, 64)]
        self.C = self.add_weight(
            shape=(input_shape[-1], self.units),
            name="input-to-hidden weights",
            initializer=self.C_initializer,
        )
        # W_h The previous hidden state via the slow transition weights [shape (64, 64)]
        # they suggest to multiply it with 0.05, so gain = 0.05
        self.W_h = self.add_weight(
            shape=(self.units, self.units),
            name="slow hidden weights",
            initializer=self.W_h_initializer, #implement the gain later on
        )
        # A (fast weights) [shape (batch_size?, 64, 64)]
        self.A = self.add_weight(
            shape=(1, self.units),
            name="fast weights",
            initializer=self.A_initializer,
        )
        if self.use_bias:
            self.bias = self.add_weight(
                shape=(self.units,), name="bias", initializer=self.b_x_initializer,
            )
        else:
            self.bias = None
        self.built = True
        
        
    def call(self, inputs, states, training=None):
        prev_output = states[0] if nest.is_sequence(states) else states 
        
#         print("Input weights:", self.C.shape)
#         print("Hidden state weights:", self.W_h.shape)
#         print("Fast weights:", self.A.shape)
        
        # Next hidden state h(t+1) is computed in two steps:
        # Step 1 calculate preliminary vector: h_0(t+1) = f(W_h ⋅ h(t) + C ⋅ x(t))
        prelim = K.dot(prev_output, self.W_h) + K.dot(inputs, self.C) 
        h = prelim
        if self.activation is not None: 
            h = self.activation(h) # add activation function

        # Step 2: Initiate inner loop with preliminary vector, which runs for S steps
        # to progressively change the hidden state into h(t+1) = h_s(t+1)
        
        # h_s+1(t+1) f([W_h ⋅ h(t) + C ⋅ x(t)]) + A(t)h_s(t+1)
        for _ in range(self.step):
#             print("Prev output", prev_output.shape)
            h = prelim + K.dot(self.A, h) 
#             print('hidden state', h.shape)
            if self.activation is not None:
                h = self.activation(h)
                
        # Fast weights update rule: A(t) = λ*A(t-1) + η*h(t) ⋅ h(t)^T

        self.A = (self.l * self.A) + K.dot((self.e * prev_output),K.transpose(prev_output))    
        print("Fast weights after update shape:", self.A.shape)
        
        output = h
        new_state = [output] if nest.is_sequence(states) else output
        return output, new_state

    def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
        return _generate_zero_filled_state_for_cell(self, inputs, batch_size, dtype)

- First prev_output is: **(None, 64)** <class 'tensorflow.python.framework.ops.Tensor'>
- The preliminary vector is: **(None, 64)** <class 'tensorflow.python.framework.ops.Tensor'>
- Fast weights shape: **(64, 64)** <class 'tensorflow.python.framework.ops.Tensor'>

# Build model

In [297]:
# Build model with sequential api
def build_model():
    model = Sequential(name="FW-RNN")
    model.add(tf.keras.Input(shape=(seq_train_features.shape[1], seq_train_features.shape[2])))
    model.add(
        layers.RNN(
            FW_RNNCell(units=64, 
                       use_bias=False, 
                       activation="relu", 
                       step=1,
                      decay_rate = 0.95,
                      learning_rate =0.5,
                   ),
            return_sequences=True,
            name = 'FW-RNN'
        )
    )
    model.add(layers.LayerNormalization(name = 'LN'))
    model.add(layers.Dense(7, activation="softmax", name="Dense_Output"))
    model.compile(
        optimizer="adagrad",
        loss=CategoricalCrossentropy(label_smoothing=0.1),
        metrics=["accuracy", "AUC"],
    )
    return model


In [298]:
fw_rnn = build_model()
fw_rnn.summary()

Fast weights after update shape: (None, 64)
Fast weights after update shape: (None, 64)
Model: "FW-RNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
FW-RNN (RNN)                 (None, 120, 64)           299008    
_________________________________________________________________
LN (LayerNormalization)      (None, 120, 64)           128       
_________________________________________________________________
Dense_Output (Dense)         (None, 120, 7)            455       
Total params: 299,591
Trainable params: 299,591
Non-trainable params: 0
_________________________________________________________________


# Train + Evaluate model

In [299]:
history_best = fw_rnn.fit(
    seq_train_features,
    seq_train_labels,
    batch_size=128,
    sample_weight=train_samples_weights,
    validation_data=(seq_val_features, seq_val_labels),
    epochs=50,
    verbose=2,
)

Epoch 1/50
Fast weights after update shape: (None, 64)
Fast weights after update shape: (None, 64)
Fast weights after update shape: (None, 64)
Fast weights after update shape: (None, 64)


TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
For example, the following function will fail:
  @tf.function
  def has_init_scope():
    my_constant = tf.constant(1.)
    with tf.init_scope():
      added = my_constant * 2
The graph tensor has name: FW-RNN/FW-RNN/while/fw_rnn_cell_84/add_2:0

In [272]:
inp = tf.keras.Input(shape=(seq_train_features.shape[1], seq_train_features.shape[2]))
rnn = layers.RNN(
            FW_RNNCell(units=64, 
                       use_bias=False, 
                       activation="relu", 
                       step=1,
                      decay_rate = 0.95,
                      learning_rate =0.5,
                   ),
            return_sequences=True,
            name = 'FW-RNN'
        )(inp)
ln = layers.LayerNormalization()(rnn)
outp = layers.Dense(7, activation="softmax", name="Dense_Output")(ln)
model = tf.keras.Model(inputs=inp, outputs = outp)
model.compile(
        optimizer="adagrad",
        loss=CategoricalCrossentropy(label_smoothing=0.1),
        metrics=["accuracy", "AUC"],
)

prev_state.shape: (None, 64)
Fast weights after update shape: (None, 64)
prev_state.shape: (None, 64)
Fast weights after update shape: (None, 64)


In [273]:
model.summary()
history_best = model.fit(
    seq_train_features,
    seq_train_labels,
    batch_size=128,
    sample_weight=train_samples_weights,
    validation_data=(seq_val_features, seq_val_labels),
    epochs=50,
    verbose=2,
)

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_87 (InputLayer)        [(None, 120, 4608)]       0         
_________________________________________________________________
FW-RNN (RNN)                 (None, 120, 64)           299008    
_________________________________________________________________
layer_normalization_8 (Layer (None, 120, 64)           128       
_________________________________________________________________
Dense_Output (Dense)         (None, 120, 7)            455       
Total params: 299,591
Trainable params: 299,591
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
prev_state.shape: (None, 64)
Fast weights after update shape: (None, 64)
prev_state.shape: (None, 64)
Fast weights after update shape: (None, 64)
prev_state.shape: (None, 64)
Fast weights after update shape: (None, 64)
prev_state.shape: (No

TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
For example, the following function will fail:
  @tf.function
  def has_init_scope():
    my_constant = tf.constant(1.)
    with tf.init_scope():
      added = my_constant * 2
The graph tensor has name: model_5/FW-RNN/while/fw_rnn_cell_79/add_2:0