## Seq2Seq model using tf.keras and Encoder/Decoder architecture

In [1]:
from random import randint
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Flatten, InputLayer, Input, TimeDistributed, RepeatVector
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Lambda

tf.keras.backend.set_floatx('float64')

#### Functions to generate a Seq2Seq Dataset and One hot encode and decode input and output sequences

In [2]:
def generate_sequence(length, n_unique):
    return [randint(1, n_unique-1) for _ in range(length)]

In [3]:
def one_hot_encode(sequence, n_unique):
	encoding = list()
	for value in sequence:
		vector = [0 for _ in range(n_unique)]
		vector[value] = 1
		encoding.append(vector)
	return np.array(encoding)

In [4]:
def one_hot_decode(encoded_seq):
    return [np.argmax(vector) for vector in encoded_seq]

In [5]:
def get_reversed_pairs(time_steps, vocabulary_size, verbose=False):
    # generate random sequence
    sequence_in = generate_sequence(time_steps, vocabulary_size)
    sequence_out = sequence_in[::-1]

    # one hot encode
    X = one_hot_encode(sequence_in, vocabulary_size)
    y = one_hot_encode(sequence_out, vocabulary_size)

    # reshape as 3D
    X = X.reshape(1, X.shape[0], X.shape[1])
    y = y.reshape(1, y.shape[0], y.shape[1])

    if verbose:
        
        print("Notes")
        print(f"1. For each input sequence,(X), we select {time_steps} random numbers between 1 and {vocabulary_size - 1}")
        print("2. 0 is reserved as the START symbol")
        print()
        
        print("A sample of input: X")
        print(one_hot_decode(X[0]))
        print()
        
        print("A sample of output(reverse of X): y")
        print(one_hot_decode(y[0]))
        print()
        
        print("X and y in One-hot encoded format")
        print("X:")
        print(X[0])
        print()
        print("y:")
        print(y[0])
        print()
        
    return X,y

In [6]:
def create_dataset(train_size,
                   test_size,
                   time_steps,
                   vocabulary_size,
                   verbose=False):
    pairs = [get_reversed_pairs(time_steps, vocabulary_size) for _ in range(train_size)]
    pairs = np.array(pairs).squeeze()


    X_train = pairs[:,0]
    y_train = pairs[:,1]

    pairs = [get_reversed_pairs(time_steps, vocabulary_size) for _ in range(test_size)]
    pairs = np.array(pairs).squeeze()

    X_test = pairs[:,0]
    y_test = pairs[:,1]

    if verbose:
        print("Train data")
        print(X_train.shape)
        print(y_train.shape)

        print("Test data")
        print(X_test.shape)
        print(y_test.shape)

    return X_train, y_train, X_test, y_test


#### Function to Train and Test given model (Early stopping monitor = val_loss)

In [7]:
ckp = ModelCheckpoint("encoder_decoder_model.keras", monitor="val_loss", verbose=1, save_best_only=True, mode="min")

In [8]:
#@title Function to Train & Test  given model (Early Stopping monitor 'val_loss')
def train_test(model, X_train, y_train , X_test, 	y_test, epochs=500, batch_size=32, patience=5,verbose=0):
	# patient early stopping
	#es = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1, patience=20)
	es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=patience)
	ckp = ModelCheckpoint("encoder_decoder_model.keras", monitor="val_loss", verbose=1, save_best_only=True, mode="min")
 
	# train model
	print('training for ',epochs,' epochs begins with EarlyStopping(monitor= val_loss, patience=',patience,')....')
	history=model.fit(X_train, y_train, validation_split= 0.1, epochs=epochs,batch_size=batch_size, verbose=verbose, callbacks=[es, ckp])
	print(epochs,' epoch training finished...')

	# report training
	# list all data in history
	#print(history.history.keys())
	# evaluate the model
	_, train_acc = model.evaluate(X_train, y_train, batch_size=batch_size, verbose=0)
	_, test_acc = model.evaluate(X_test, 	y_test, batch_size=batch_size, verbose=0)
	print('\nPREDICTION ACCURACY (%):')
	print('Train: %.3f, Test: %.3f' % (train_acc*100, test_acc*100))
	# summarize history for accuracy
	plt.plot(history.history['accuracy'])
	plt.plot(history.history['val_accuracy'])
	plt.title(model.name+' accuracy')
	plt.ylabel('accuracy')
	plt.xlabel('epoch')
	plt.legend(['train', 'val'], loc='upper left')
	plt.show()
	# summarize history for loss
	plt.plot(history.history['loss'])
	plt.plot(history.history['val_loss'])
	plt.title(model.name+' loss')
	plt.ylabel('loss')
	plt.xlabel('epoch')
	plt.legend(['train', 'val'], loc='upper left')
	plt.show()




  
	# spot check some examples
	space = 3*len(one_hot_decode(y_test[0]))
	print('10 examples from test data...')
	print('Input',' '*(space-4) ,'Expected',' '*(space-7) ,
	      'Predicted',' '*(space-5) ,'T/F')
	correct = 0 
	sampleNo = 10

	predicted= model_encoder_decoder.predict(X_test[:sampleNo], batch_size=batch_size)
	for sample in range(0,sampleNo):
		if (one_hot_decode(y_test[sample])== one_hot_decode(predicted[sample])):
			correct+=1
		print( one_hot_decode(X_test[sample]), ' ', 
					one_hot_decode(y_test[sample]),' ', one_hot_decode(predicted[sample]),
					' ',one_hot_decode(y_test[sample])== one_hot_decode(predicted[sample]))
	print('Accuracy: ', correct/sampleNo)
	


#### Configuring the problem

In [9]:
n_timesteps = 4
n_features = 10

X,y = get_reversed_pairs(n_timesteps, n_features, verbose=True)

Notes
1. For each input sequence,(X), we select 4 random numbers between 1 and 9
2. 0 is reserved as the START symbol

A sample of input: X
[9, 9, 7, 4]

A sample of output(reverse of X): y
[4, 7, 9, 9]

X and y in One-hot encoded format
X:
[[0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 1 0 0 0 0 0]]

y:
[[0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 1]]



In [10]:
# generate datasets
train_size= 2000 #@param {type:"integer"}
test_size = 200  #@param {type:"integer"}

X_train, y_train , X_test, 	y_test=create_dataset(train_size, test_size, n_timesteps, n_features , verbose=True)

Train data
(2000, 4, 10)
(2000, 4, 10)
Test data
(200, 4, 10)
(200, 4, 10)


In [11]:
noOfUnits = 16

#### Defining Encoder LSTM

In [12]:
encoder_inputs = Input(shape=(n_timesteps, n_features),
                       name="encoder_inputs")
encoder_lstm = LSTM(noOfUnits, return_state=True,
                    name="encoder_lstm")

encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
states = [state_h, state_c]

In [13]:
# gives us last hidden state, last hidden state and last cell state
encoder_lstm.output_shape

[(None, 16), (None, 16), (None, 16)]

Context vector = [state_h, state_c]

In [14]:
# define and compile encoder
model_encoder = Model(inputs = encoder_inputs, outputs = states)
context_vector = model_encoder(X)

print(X.shape)
print(f"noOfUnits: {noOfUnits}")
print(f"Last Hidden State: {context_vector[0].numpy().shape}")
print(f"Last Cell State: {context_vector[1].numpy().shape}")

(1, 4, 10)
noOfUnits: 16
Last Hidden State: (1, 16)
Last Cell State: (1, 16)


#### Defining Decoder LSTM

In [15]:
decoder_inputs = Input(shape=(1, n_features),
                       name = "decoder_inputs")

decoder_lstm = LSTM(noOfUnits,
                    return_sequences=True,
                    return_state=True,
                    name="decoder_lstm")

decoder_dense = Dense(n_features, activation='softmax')

outputs, state_h, state_c = decoder_lstm(decoder_inputs,
                                         initial_state=states)

In [16]:
decoder_lstm.output_shape

[(None, 1, 16), (None, 16), (None, 16)]

In [17]:
#title Sample X and y

print('\nIn raw format:')
print("X:",(one_hot_decode(X[0])))
print("y:",(one_hot_decode(y[0])))
print('\nIn one_hot_encoded format:')
print("X: ")
print((X[0]))
print()
print("y: ")
print((y[0]))


In raw format:
X: [9, 9, 7, 4]
y: [4, 7, 9, 9]

In one_hot_encoded format:
X: 
[[0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 1 0 0 0 0 0]]

y: 
[[0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 1]]


In [18]:
def create_hard_coded_decoder_input_model(batch_size):
  # The first part is encoder
  encoder_inputs = Input(shape=(n_timesteps, n_features), name='encoder_inputs')
  encoder_lstm = LSTM(noOfUnits, return_state=True,  name='encoder_lstm')
  encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
  
  # initial context vector is the states of the encoder
  states = [state_h, state_c]
  
  # Set up the decoder layers
  # Attention: decoder receives 1 token at a time &
  # decoder outputs 1 token at a time 
  
  decoder_lstm = LSTM(noOfUnits, return_sequences=True, 
                      return_state=True, name='decoder_lstm')
  decoder_dense = Dense(n_features, activation='softmax',  name='decoder_dense')

  all_outputs = []
  # Prepare decoder initial input data: just contains the START character 0
  # Note that we made it a constant one-hot-encoded in the model
  # that is, [1 0 0 0 0 0 0 0 0 0] is the initial input for each loop
  decoder_input_data = np.zeros((batch_size, 1, n_features))
  decoder_input_data[:, 0, 0] = 1 
  
  # that is, [1 0 0 0 0 0 0 0 0 0] is the initial input for each loop
  inputs = decoder_input_data
  # decoder will only process one time step at a time
  # loops for fixed number of time steps: n_timesteps_in
  for _ in range(n_timesteps):
      # Run the decoder on one time step
      outputs, state_h, state_c = decoder_lstm(inputs,
                                              initial_state=states)
      outputs = decoder_dense(outputs)
      # Store the current prediction (we will concatenate all predictions later)
      all_outputs.append(outputs)
      # Reinject the outputs as inputs for the next loop iteration
      # as well as update the states
      inputs = outputs
      states = [state_h, state_c]

  # Concatenate all predictions such as [batch_size, timesteps, features]
  decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(all_outputs)

  # Define and compile model 
  model = Model(encoder_inputs, decoder_outputs, name='encoder_decoder_model')
  model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [19]:
batch_size = 10
model = create_hard_coded_decoder_input_model(batch_size)
model.summary()
#plot_model(model, show_shapes=True, show_layer_names=True, to_file='basic_encoder_decoder_model.png')

Model: "encoder_decoder_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     [(None, 4, 10)]      0                                            
__________________________________________________________________________________________________
encoder_lstm (LSTM)             [(None, 16), (None,  1728        encoder_inputs[0][0]             
__________________________________________________________________________________________________
decoder_lstm (LSTM)             [(10, 1, 16), (None, 1728        encoder_lstm[0][1]               
                                                                 encoder_lstm[0][2]               
                                                                 decoder_dense[0][0]              
                                                                 decoder_lstm[

In [20]:
train_test(model, X_train, y_train, X_test, y_test, batch_size=batch_size,epochs=500, verbose=1)

training for  500  epochs begins with EarlyStopping(monitor= val_loss, patience= 5 )....
Epoch 1/500

Epoch 00001: val_loss improved from inf to 2.00332, saving model to encoder_decoder_model.keras
Epoch 2/500

Epoch 00002: val_loss improved from 2.00332 to 1.67975, saving model to encoder_decoder_model.keras
Epoch 3/500

Epoch 00003: val_loss improved from 1.67975 to 1.55797, saving model to encoder_decoder_model.keras
Epoch 4/500

Epoch 00004: val_loss improved from 1.55797 to 1.46813, saving model to encoder_decoder_model.keras
Epoch 5/500

Epoch 00005: val_loss improved from 1.46813 to 1.39701, saving model to encoder_decoder_model.keras
Epoch 6/500

Epoch 00006: val_loss improved from 1.39701 to 1.33622, saving model to encoder_decoder_model.keras
Epoch 7/500

Epoch 00007: val_loss improved from 1.33622 to 1.28579, saving model to encoder_decoder_model.keras
Epoch 8/500

Epoch 00008: val_loss improved from 1.28579 to 1.23856, saving model to encoder_decoder_model.keras
Epoch 9/500

: 