# 9 Adding a Custom Attention Layer to Recurrent Neural Network in Keras

In [63]:
import numpy as np
import pandas as pd
import tensorflow.keras.backend as K
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import layers
from tensorflow.keras.metrics import mean_squared_error
from tensorflow.keras.models import Sequential

seed = 13

## 9.2 The SimpleRNN Network

Our aim is to train an RNN on the Fibonacci numbers and get it to predict the next term given the first few.  
First, we need to construct the data for our contrived prediction problem...

In [64]:
def get_fib_seq(n, scale_data=True):
    """
    Get the first n terms of the Fibonacci sequence,
    excluding the initial (0, 1).
    """
    seq = np.zeros(n)
    fib_2_prior = 0.0
    fib_1_prior = 1.0
    for i in range(n):
        seq[i] = fib_2_prior + fib_1_prior
        fib_2_prior = fib_1_prior
        fib_1_prior = seq[i]
    scaler = None
    if scale_data:
        scaler = MinMaxScaler(feature_range=(0, 1))
        seq = np.reshape(seq, (n, 1))
        seq = scaler.fit_transform(seq).flatten()
    return seq, scaler

In [65]:
fib_seq, _ = get_fib_seq(10, False)
print(fib_seq)

[ 1.  2.  3.  5.  8. 13. 21. 34. 55. 89.]


In [66]:
def get_fib_xy(total_fib_numbers, time_steps, train_frac, scale_data=True):
    """Returns train/test data constructed from the Fibonacci series.

    Constructs training and test data (as X and y pairs),  where each row of X
    consists of `time_steps` consecutive Fibonacci numbers and each y is the
    Fibonacci number immediately following the last one in the corresponding X.

    Args:
        total_fib_numbers: Total number of terms of the Fibonacci series to use
          to draw from to construct X/y pairs. (Excludes the initial 0, 1)
        time_steps: Number of terms in each X sample.
        train_frac: Fraction of data to designate as the training set.
        scale_data: Whether to min-max-scale the data to the (0, 1) range.

    Returns:
        Shuffled training and testing pairs of (X, y) data and, optionally, a
        MinMaxScaler object (or None). The returned X tensors are of dimensions
        (num_samples, num_time_steps, num_features = 1).
    """
    data, scaler = get_fib_seq(total_fib_numbers, scale_data)

    y_indices = np.arange(time_steps, len(data), 1)
    y = data[y_indices]
    num_samples = len(y)
    X = data[0:num_samples]
    for i in range(1, time_steps):
        X = np.column_stack((X, data[i : num_samples + i]))

    # Now introduce random permutations
    rand = np.random.RandomState(seed)
    indices = rand.permutation(num_samples)
    split = int(train_frac * num_samples)
    train_indices = indices[0:split]
    test_indices = indices[split:]

    X_train = X[train_indices]
    y_train = y[train_indices]

    X_test = X[test_indices]
    y_test = y[test_indices]

    X_train = np.reshape(X_train, (len(X_train), time_steps, 1))
    X_test = np.reshape(X_test, (len(X_test), time_steps, 1))

    return X_train, y_train, X_test, y_test, scaler

In [67]:
get_fib_xy(12, 3, 0.7, False)

(array([[[ 8.],
         [13.],
         [21.]],
 
        [[ 5.],
         [ 8.],
         [13.]],
 
        [[ 2.],
         [ 3.],
         [ 5.]],
 
        [[13.],
         [21.],
         [34.]],
 
        [[21.],
         [34.],
         [55.]],
 
        [[34.],
         [55.],
         [89.]]]),
 array([ 34.,  21.,   8.,  55.,  89., 144.]),
 array([[[ 55.],
         [ 89.],
         [144.]],
 
        [[  1.],
         [  2.],
         [  3.]],
 
        [[  3.],
         [  5.],
         [  8.]]]),
 array([233.,   5.,  13.]),
 None)

Great! Now let us build a model...

In [68]:
# Set up parameters
time_steps = 20
hidden_units = 2
epochs = 30

# Create a traditional RNN network
def create_rnn(hidden_units, dense_units, input_shape, activations):
    model = Sequential()
    model.add(
        layers.SimpleRNN(
            hidden_units, input_shape=input_shape, activation=activations[0]
        )
    )
    model.add(layers.Dense(dense_units, activation=activations[1]))
    model.compile(loss="mse", optimizer="adam")
    return model

In [69]:
model_rnn = create_rnn(
    hidden_units=hidden_units,
    dense_units=1,
    input_shape=(time_steps, 1),
    activations=["tanh", "tanh"],
)
model_rnn.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_5 (SimpleRNN)    (None, 2)                 8         
                                                                 
 dense_5 (Dense)             (None, 1)                 3         
                                                                 
Total params: 11
Trainable params: 11
Non-trainable params: 0
_________________________________________________________________


Now we are ready to create a larger dataset and train the model on it...

In [70]:
# Generate the dataset
X_trian, y_train, X_test, y_test, scaler = get_fib_xy(1200, time_steps, 0.7)

In [71]:
model_rnn.fit(X_trian, y_train, epochs=epochs, batch_size=1, verbose=2)

Epoch 1/30
826/826 - 1s - loss: 2.4229e-04 - 1s/epoch - 1ms/step
Epoch 2/30
826/826 - 1s - loss: 1.8470e-04 - 650ms/epoch - 787us/step
Epoch 3/30
826/826 - 1s - loss: 1.3825e-04 - 651ms/epoch - 788us/step
Epoch 4/30
826/826 - 1s - loss: 1.0765e-04 - 656ms/epoch - 795us/step
Epoch 5/30
826/826 - 1s - loss: 8.9705e-05 - 635ms/epoch - 769us/step
Epoch 6/30
826/826 - 1s - loss: 7.1903e-05 - 628ms/epoch - 760us/step
Epoch 7/30
826/826 - 1s - loss: 6.4769e-05 - 624ms/epoch - 755us/step
Epoch 8/30
826/826 - 1s - loss: 6.4774e-05 - 648ms/epoch - 785us/step
Epoch 9/30
826/826 - 1s - loss: 5.9154e-05 - 643ms/epoch - 779us/step
Epoch 10/30
826/826 - 1s - loss: 5.9940e-05 - 626ms/epoch - 758us/step
Epoch 11/30
826/826 - 1s - loss: 5.6688e-05 - 624ms/epoch - 756us/step
Epoch 12/30
826/826 - 1s - loss: 5.6964e-05 - 624ms/epoch - 755us/step
Epoch 13/30
826/826 - 1s - loss: 5.7613e-05 - 616ms/epoch - 746us/step
Epoch 14/30
826/826 - 1s - loss: 5.7043e-05 - 619ms/epoch - 750us/step
Epoch 15/30
826/826 

<keras.callbacks.History at 0x16dd73dc0>

In [72]:
# Evaluate the model
train_mse = model_rnn.evaluate(X_trian, y_train)
test_mse = model_rnn.evaluate(X_test, y_test)
print("Train set MSE = ", train_mse)
print("Test set MSE = ", test_mse)

Train set MSE =  4.647787136491388e-05
Test set MSE =  2.5590516088414006e-05


## 9.3 Adding a Custom Attention Layer to the Network