In [1]:
import numpy as np

# Keras
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense

%matplotlib inline

# Encoder-Decoder LSTM for Sorting Numbers

The first step is to configure the problem, we will use with 6 numbers to order.

In [2]:
n_features = 50
n_steps_in = 6
n_steps_out = 6

Function to define an encoder-decoder recurrent neural networks, two recurrent neural networks, one to encode the source sequence (encoder), and a second to decode the encoded source sequence into the target sequence (decoder).

For training the model takes both the input and a shifted version of the target sequence as input and predicts the whole target sequence, the inference encoder model is used to encode the input sequence once which returns states that are used to initialize the inference decoder model. From that point, the inference decoder model is used to generate predictions step by step.

In [3]:
# returns train, inference_encoder and inference_decoder models
def models(n_input, n_output, n_units):
    """
    Create models for encoder-decoder neural network

    Parameters
    ----------
    n_input : int
        Length of input sequence, e.g number of integers to sort
    n_output : int
        Length of output sequence, e.g number of integers sorted
    n_units : int
        Cells to create in the encoder and decoder models

    Returns
    -------
    model, encoder_model, decoder_model
    """
    # training encoder
    encoder_inputs = Input(shape=(None, n_input))
    encoder = LSTM(n_units, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    encoder_states = [state_h, state_c]

    
    # training decoder
    decoder_inputs = Input(shape=(None, n_output))
    decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_dense = Dense(n_output, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    # inference encoder
    encoder_model = Model(encoder_inputs, encoder_states)

    # inference decoder
    decoder_state_input_h = Input(shape=(n_units,))
    decoder_state_input_c = Input(shape=(n_units,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

    return model, encoder_model, decoder_model

Defining the models and compiling the training model

In [4]:
train, encoder, decoder = models(n_features, n_features, 128)
train.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Generate a training dataset and traing the model

In [5]:
def generate_sequence(length, max_number):
    """
    Generate a sequence of random numbers

    Parameters
    ----------
    length : int
        Length of the sequence
    max_number : int
        Maximum number value

    Returns
    -------
    sequence
    """
    sequence = np.random.randint(max_number, size=length)
    return sequence

In [6]:
def one_hot_decode(encoded_seq):
    """
    Decode a one hot encoded string

    Parameters
    ----------
    encoded_seq : int
        Length of the sequence

    Returns
    -------
    sequence decoded
    """
    return [np.argmax(vector) for vector in encoded_seq]

In [7]:
def generate_dataset(n_input, max_number, n_samples):
    """
    Generate a number of sequences to use to train a model

    Parameters
    ----------
    n_input : int
        Length of input sequence, e.g number of integers to sort
    max_number : int
        Maximum number value
    n_samples : int
        Number of samples to generate

    Returns
    -------
    X1, X2, y
        Train and target datasets
    """
    X1, X2, y = list(), list(), list()
    for _ in range(n_samples):
        # generate source sequence
        source = generate_sequence(n_input, max_number)
        # define target sequence
        target = sorted(source)
        # create padded input target sequence
        target_in = [0] + target[:-1]
        # encode
        src_encoded = to_categorical(source, num_classes=max_number)
        tar_encoded = to_categorical(target, num_classes=max_number)
        tar2_encoded = to_categorical(target_in, num_classes=max_number)
        # store
        X1.append(src_encoded)
        X2.append(tar2_encoded)
        y.append(tar_encoded)

    return np.array(X1), np.array(X2), np.array(y)

Example of dataset

In [8]:
n_samples = 1
X1, X2, y = generate_dataset(n_steps_in, n_features, n_samples)
print(X1.shape, X2.shape, y.shape)
print('X1=%s, X2=%s, target=%s' % (one_hot_decode(X1[0]), one_hot_decode(X2[0]), one_hot_decode(y[0])))

(1, 6, 50) (1, 6, 50) (1, 6, 50)
X1=[12, 33, 7, 5, 18, 46], X2=[0, 5, 7, 12, 18, 33], target=[5, 7, 12, 18, 33, 46]


Generate a training dataset of 50000 examples and train the model

In [9]:
n_samples = 50000
X1, X2, y = generate_dataset(n_steps_in, n_features, n_samples)
print(X1.shape, X2.shape, y.shape)

(50000, 6, 50) (50000, 6, 50) (50000, 6, 50)


In [10]:
train.fit([X1, X2], y, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x136e4f760>

After the model is trained, we can evaluate it, we define a function to generate a target sequence given a sequence

In [11]:
def predict_sequence(encoder, decoder, source, n_steps, max_number):
    """
    Predict target sequence given 

    Parameters
    ----------
    encoder : model
        Encoder when making a prediction for a sequence
    decoder : model
        Decoder when making a prediction for a sequence
    source : array
        Encoded sequence
    n_steps: int
        Number of numbers to predict
    max_number : int
        Maximum number value for reshape

    Returns
    -------
    list with the target sequence predicted
    """
    # encode
    state = encoder.predict(source)
    # start of sequence input
    target_seq = np.array([0.0 for _ in range(max_number)]).reshape(1, 1, max_number)
    
    # collect predictions
    output = list()
    for t in range(n_steps):
        # predict next char
        yhat, h, c = decoder.predict([target_seq] + state)
        # store prediction
        output.append(yhat[0,0,:])
        # update state
        state = [h, c]
        # update target sequence
        target_seq = yhat
    return np.array(output)

We evaluate our lstm model by making predictions for 100 sequences and counting the number of sequences we predicted correctly, we can see that with 50000 number of samples we got an accuracy over 90%!

In [12]:
total, correct = 100, 0
for _ in range(total):
    X1, X2, y = generate_dataset(n_steps_in, n_features, 1)
    target = predict_sequence(encoder, decoder, X1, n_steps_out, n_features)
    if np.array_equal(one_hot_decode(y[0]), one_hot_decode(target)):
        correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))

Accuracy: 96.00%


Finally, print the input, target and predictions of 10 examples to see how the model is working!

In [13]:
for _ in range(10):
	X1, X2, y = generate_dataset(n_steps_in, n_features, 1)
	target = predict_sequence(encoder, decoder, X1, n_steps_out, n_features)
	print('Input=%s target=%s, prediction=%s' % (one_hot_decode(X1[0]), one_hot_decode(y[0]), one_hot_decode(target)))

Input=[0, 36, 33, 25, 21, 26] target=[0, 21, 25, 26, 33, 36], prediction=[0, 21, 25, 26, 33, 36]
Input=[5, 23, 41, 44, 11, 38] target=[5, 11, 23, 38, 41, 44], prediction=[5, 11, 23, 38, 41, 44]
Input=[0, 19, 34, 16, 22, 15] target=[0, 15, 16, 19, 22, 34], prediction=[0, 15, 16, 19, 22, 34]
Input=[42, 21, 29, 30, 44, 11] target=[11, 21, 29, 30, 42, 44], prediction=[11, 21, 29, 30, 42, 44]
Input=[45, 22, 17, 7, 29, 26] target=[7, 17, 22, 26, 29, 45], prediction=[7, 17, 22, 26, 29, 45]
Input=[37, 12, 6, 7, 27, 10] target=[6, 7, 10, 12, 27, 37], prediction=[6, 7, 10, 12, 27, 37]
Input=[8, 49, 19, 25, 35, 32] target=[8, 19, 25, 32, 35, 49], prediction=[8, 19, 25, 32, 35, 49]
Input=[36, 38, 2, 28, 38, 23] target=[2, 23, 28, 36, 38, 38], prediction=[2, 23, 28, 38, 38, 38]
Input=[8, 22, 35, 3, 6, 21] target=[3, 6, 8, 21, 22, 35], prediction=[3, 6, 8, 21, 22, 35]
Input=[15, 12, 34, 19, 5, 25] target=[5, 12, 15, 19, 25, 34], prediction=[5, 12, 15, 19, 25, 34]
