# Sequence-to-Sequence (seq2seq) LSTM Encoder-Decoder Model Tutorial

https://machinelearningmastery.com/develop-encoder-decoder-model-sequence-sequence-prediction-keras/

### Imports

In [2]:
import os
import random

import scipy.io as sio
import numpy as np
import h5py
import tensorflow as tf
import matplotlib.pyplot as plt

from scipy.stats import pearsonr
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.layers import Input, Average, Concatenate, Bidirectional, TimeDistributed, Dense, Dropout, LSTM, Activation, Multiply, Conv1D, Conv3D
from keras.regularizers import L2
from keras.utils import to_categorical
from keras_tuner import RandomSearch, BayesianOptimization, Objective
import keras_tuner as kt
import tensorflow_probability as tfp
from sklearn.metrics import mean_squared_error, accuracy_score, balanced_accuracy_score
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, LeaveOneOut, ShuffleSplit, StratifiedShuffleSplit, StratifiedKFold

### Model Definition

In [3]:
def encoder_decoder_model(n_input, n_output, n_units):
    """Creates a LSTM encoder-decoder model for sequence-to-sequence prediction in Keras.

    Args:
        n_input (int): The cardinality of the input sequence, e.g. number of features, words, or characters for each time step.
        n_output (int): The cardinality of the output sequence, e.g. number of features, words, or characters for each time step.
        n_units (int): The number of cells to create in the encoder and decoder models, e.g. 128 or 256.

    Returns:
        (Model, Model, Model): A tuple of Keras models for training, inference encoder, and inference decoder.
    """    
    # define training encoder
    encoder_inputs = Input(shape=(None, n_input))
    encoder = LSTM(n_units, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    encoder_states = [state_h, state_c]

    # define training decoder
    decoder_inputs = Input(shape=(None, n_output))
    decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_dense = Dense(n_output, activation='softmax') # proability distribution across ouptut classes
    decoder_outputs = decoder_dense(decoder_outputs)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # training model, takes input data and training sequences

    # define inference encoder
    encoder_model = Model(encoder_inputs, encoder_states)

    # define inference decoder
    decoder_state_input_h = Input(shape=(n_units,))
    decoder_state_input_c = Input(shape=(n_units,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

    return model, encoder_model, decoder_model

In [11]:
def predict_sequences(infenc, infdec, source, n_steps, cardinality):
    """Predict the target sequence for a given source sequence using inference encoder and decoder models.

    Args:
        infenc (keras.models Model): Encoder model used when making a prediction for a new source sequence.
        infdec (keras.models Model): Decoder model use when making a prediction for a new source sequence.
        source (array-like): Source sequence.
        n_steps (int): Length of target sequence.
        cardinality (int): The cardinality of each output sequence element, i.e. number of classes to predict sequence elements from.
    """    
    # encode source through encoder model
    state = infenc.predict(source)
    # initial decoder sequence input
    target_dist = np.array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
    output = []
    for _ in range(n_steps):
        yhat, h, c = infdec.predict([target_dist] + state)
        # save prediction distribution
        output.append(yhat[0,0,:])
        # update states
        state = [h, c]
        # update target sequence
        target_dist = yhat
        return np.array(output)

### Seq2seq Data Definition

In [19]:
def generate_sequence(length, n_unique):
    """Generates a sequence of random integers in the range 1 to n_unique.

    Args:
        length (int): length of sequence to generate.
        n_unique (int): cardinality of input sequence elements (upper bound on random integers to generate).

    Returns:
        array-like: sequence of random integers in range 1 to n_unique.
    """
    return [random.randint(1, n_unique - 1) for _ in range(length)]

def reverse_sequence_first_n(source, n_out):
    """Reverses the first n elements of a source sequence and returns only the reversed sequence.

    Args:
        source (array-like): sequence to reverse.
        n_out (int): number of elements in source to reverse.

    Returns:
        array-like: first n elements of source sequence in reverse order.
    """      
    return source[:n_out][::-1]

def pad_sequence(source):
    """Shifts a source sequence by one element and pads the first element with a 0.

    Args:
        source (array-like): Sequence to shift and pad.

    Returns:
        array-like: Shifted and padded sequence.
    """    
    return [0] + source[:-1]

def get_dataset(n_in, n_out, cardinality, n_samples):
    """Generates a dataset of source, target, and shifted target sequences.

    Args:
        n_in (int): Length of source sequences.
        n_out (int): Length of target sequences.
        cardinality (int): Cardinality of source and target sequences.
        n_samples (int): Number of samples to generate.

    Returns:
        (array-like, array-like, array-like): Tuple of source, target, and shifted target sequences.
    """
    X1, X2, y = list(), list(), list()
    for _ in range(n_samples):
        # create source sequence
        source = generate_sequence(n_in, cardinality)

        # create target sequence from source
        target = reverse_sequence_first_n(source, n_out)
        target_pad = pad_sequence(target)

        # encode source data for model input
        src_encoded = to_categorical(source, num_classes=cardinality)
        tar_encoded = to_categorical(target, num_classes=cardinality)
        tar_pad_encoded = to_categorical(target_pad, num_classes=cardinality)

        # save data for current sample
        X1.append(src_encoded)
        X2.append(tar_pad_encoded)
        y.append(tar_encoded)

        return np.array(X1), np.array(X2), np.array(y)
    
def one_hot_decode(encoded_seq):
    """Decodes a class from a one hot encoded sequence.

    Args:
        encoded_seq (array-like): One hot encoded sequence.

    Returns:
        list: Decoded sequence.
    """
    return [np.argmax(class_dist) for class_dist in encoded_seq]

Test Data Generation

In [21]:
n_feats = 50 + 1 # 50 features + 1 value reserved for padding (input to decoder model for prediction of first element)
n_steps_in = 6
n_steps_out = 3

X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_feats, 1)
print(f'Source Sequence Shape: {X1.shape}')
print(f'Padded Target Sequence Shape: {X2.shape}')
print(f'Target Sequence Shape: {y.shape}\n')

print(f'Source Sequence Sample: {one_hot_decode(X1[0])}')
print(f'Padded Target Sequence Sample: {one_hot_decode(X2[0])}')
print(f'Target Sequence Sample: {one_hot_decode(y[0])}')

Source Sequence Shape: (1, 6, 51)
Padded Target Sequence Shape: (1, 3, 51)
Target Sequence Shape: (1, 3, 51)

Source Sequence Sample: [2, 12, 29, 1, 47, 23]
Padded Target Sequence Sample: [0, 29, 12]
Target Sequence Sample: [29, 12, 2]
