In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import RepeatVector

from random import randint


def check_gpu(cuda_only = True):
    is_gpu_available = tf.test.is_gpu_available(
    cuda_only=cuda_only, min_cuda_compute_capability=None
    )
    gpu_string = "CUDA"
    if (cuda_only == False):
        gpu_string = "GPU"
    print(f"is {gpu_string} available: {is_gpu_available}")
    print(tf.config.list_physical_devices('GPU'))
    print("Tensorflow version ", tf.__version__)
    print("Keras version ", tf.keras.__version__)



In [2]:
check_gpu()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
is CUDA available: True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Tensorflow version  2.6.0
Keras version  2.6.0


In [3]:
#@title Generate on hot encoding

# generate a random sequence of integers
def generate_sequence(length, num_unique):
    return [randint(0, num_unique - 1) for _ in range(length)]

def one_hot_encode_sequence(sequence, num_unique):
    encoding = []
    for value in sequence:
        vector = [0 for _ in range(num_unique)]
        # use the value as an index in the onehot encoding
        vector[value] = 1
        # every element in seq is an array of one hot encoding 
        # where all elems are 0 except for a single 1 at the index
        # of the value
        encoding.append(vector)
    return np.array(encoding)

def one_hot_decode_sequence(one_hot_encoded):
    return [np.argmax(vector) for vector in one_hot_encoded]

In [4]:
# lets test our functions
num_unique_tokens = 5
# generate random sequence
seq = generate_sequence(3,num_unique_tokens)
# one hot encode the sequence
one_hot_seq = one_hot_encode_sequence(seq, num_unique_tokens)
# decode the one hot encoding
decoded_seq = one_hot_decode_sequence(one_hot_seq)
display(seq, one_hot_seq, decoded_seq)

[2, 4, 0]

array([[0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0]])

[2, 4, 0]

In [5]:
# creates data (feature, label) for our LSTM
def get_pair(n_in, n_out, n_unique, verbose = False):
    # generate random sequence
    sequence_in = generate_sequence(n_in, n_unique)
    # the out seq is the same as in seq only that it is truncated to 
    # be of length n_out, in case n_in is larger we pad
    # the out seq with zeros
    sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in - n_out)]
    
    # one hot encode
    X = one_hot_encode_sequence(sequence_in, n_unique)
    y = one_hot_encode_sequence(sequence_out, n_unique)
    
    # we need to make the tensors 3d for LSTM,
    # so we add an extra dummy dim
    X = X.reshape( (1, X.shape[0], X.shape[1]) )
    y = y.reshape( (1, y.shape[0], y.shape[1]) )
    
    if(verbose):
        print('Generated seq as follows')
        print(f'X shape {X.shape}, y shape {y.shape}')
    return X,y

In [6]:
#@title configure the problem
# since input timesteps is 4 and out steps is 2, the output
# will be only 2 values of the input, the rest will be zeros

n_timesteps_in = 4 # each input sample has 4 values
n_features = 10 # the length of the one hot encoded code
n_timesteps_out = 2 # each output sample has 2 values zero padded

X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features, verbose=True)


Generated seq as follows
X shape (1, 4, 10), y shape (1, 4, 10)


In [7]:
display(X, y)
display([one_hot_decode_sequence(sample) for sample in X])
display([one_hot_decode_sequence(sample) for sample in y])


array([[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]]])

array([[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]])

[[0, 6, 1, 2]]

[[0, 6, 0, 0]]

In [34]:
# input_layer = Input( shape=(n_timesteps_in, n_features) )
# lstm_layer = LSTM(num_LSTM_cells, return_state = True)
# output_layer = lstm_layer(input_layer)
# model1 = Model(inputs=input_layer, outputs = output_layer)
# model1.compile(loss='categorical_crossentropy', optimizer='adam',
#                metrics=['accuracy'])
# model1.summary()

num_LSTM_cells = 100
model = Sequential()
model.add( Input( shape=(n_timesteps_in, n_features) ))
model.add(LSTM(num_LSTM_cells)) # outputs a scaler of the hidden state for every LSTM
# we duplicate the input for the num of time steps (same as input layer) to pass to the next LSTM layer
model.add(RepeatVector(n_timesteps_in))
model.add(LSTM(num_LSTM_cells, return_sequences=True)) # outputs the hidden states of all LSTM cells for every input time step (returnSeq=True)
# the time distributed applies the dense to every instance independently
dense_layer = Dense(n_features, activation='softmax')
model.add(TimeDistributed(dense_layer))
model.compile(loss='categorical_crossentropy', optimizer='adam',
               metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 100)               44400     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 100)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 4, 100)            80400     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 10)             1010      
Total params: 125,810
Trainable params: 125,810
Non-trainable params: 0
_________________________________________________________________


In [24]:
# def train_model(my_model):
total_epochs = 100

for epoch in range(total_epochs):
    X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    # fit the model once per sample
    model.fit(X,y, epochs = 1, verbose = 1)
#     return my_model
# train_model(model)



In [30]:
# evaluate lstm
total, correct = 100, 0
for _ in range (total):
    X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    yhat = model.predict(X, verbose=0)
    if np.array_equal(one_hot_decode_sequence(y[0]), one_hot_decode_sequence(yhat[0])):
        correct +=1
print("Accuracy: %.2f%%" % (float(correct)/float(total)*100.0))


Accuracy: 41.00%


In [32]:
# check some examples
for _ in range (10):
    X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    yhat = model.predict(X, verbose=0)
    print('Input', one_hot_decode_sequence(X[0]))
    print("Expected", one_hot_decode_sequence(y[0]))
    print("Actual", one_hot_decode_sequence(yhat[0]))
    print("")

Input [9, 5, 1, 8]
Expected [9, 5, 0, 0]
Actual [9, 9, 0, 0]

Input [6, 4, 9, 1]
Expected [6, 4, 0, 0]
Actual [6, 4, 0, 0]

Input [0, 1, 3, 9]
Expected [0, 1, 0, 0]
Actual [1, 0, 0, 0]

Input [8, 6, 2, 6]
Expected [8, 6, 0, 0]
Actual [8, 6, 0, 0]

Input [2, 1, 2, 8]
Expected [2, 1, 0, 0]
Actual [2, 2, 0, 0]

Input [7, 4, 2, 1]
Expected [7, 4, 0, 0]
Actual [7, 7, 0, 0]

Input [0, 2, 4, 0]
Expected [0, 2, 0, 0]
Actual [2, 0, 0, 0]

Input [0, 4, 7, 1]
Expected [0, 4, 0, 0]
Actual [7, 0, 0, 0]

Input [1, 4, 8, 5]
Expected [1, 4, 0, 0]
Actual [1, 4, 0, 0]

Input [3, 3, 5, 5]
Expected [3, 3, 0, 0]
Actual [3, 3, 0, 0]



# Now define a model where both LSTM layers return a sequence

In [35]:
num_LSTM_cells = 100
model2 = Sequential()

model2.add( Input( shape=(n_timesteps_in, n_features) ))
model2.add(LSTM(num_LSTM_cells, return_sequences=True)) 
model2.add(LSTM(num_LSTM_cells, return_sequences=True)) # outputs the hidden states of all LSTM cells for every input time step (returnSeq=True)
# the time distributed applies the dense to every instance independently
dense_layer = Dense(n_features, activation='softmax')
model2.add(TimeDistributed(dense_layer))
model2.compile(loss='categorical_crossentropy', optimizer='adam',
               metrics=['accuracy'])
model2.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 4, 100)            44400     
_________________________________________________________________
lstm_6 (LSTM)                (None, 4, 100)            80400     
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 10)             1010      
Total params: 125,810
Trainable params: 125,810
Non-trainable params: 0
_________________________________________________________________


In [43]:
total_epochs = 1500

for epoch in range(total_epochs):
    X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    # fit the model once per sample
    model2.fit(X,y, epochs = 1, verbose = 1)



In [46]:
total, correct = 500, 0
for _ in range (total):
    X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    yhat = model2.predict(X, verbose=0)
    if np.array_equal(one_hot_decode_sequence(y[0]), one_hot_decode_sequence(yhat[0])):
        correct +=1
print("Accuracy: %.2f%%" % (float(correct)/float(total)*100.0))

Accuracy: 100.00%


In [47]:
# check some examples
for _ in range (10):
    X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
    yhat = model2.predict(X, verbose=0)
    print('Input', one_hot_decode_sequence(X[0]))
    print("Expected", one_hot_decode_sequence(y[0]))
    print("Actual", one_hot_decode_sequence(yhat[0]))
    print("")

Input [3, 1, 3, 4]
Expected [3, 1, 0, 0]
Actual [3, 1, 0, 0]

Input [5, 6, 7, 0]
Expected [5, 6, 0, 0]
Actual [5, 6, 0, 0]

Input [3, 7, 2, 9]
Expected [3, 7, 0, 0]
Actual [3, 7, 0, 0]

Input [3, 0, 0, 1]
Expected [3, 0, 0, 0]
Actual [3, 0, 0, 0]

Input [2, 5, 7, 3]
Expected [2, 5, 0, 0]
Actual [2, 5, 0, 0]

Input [4, 1, 0, 2]
Expected [4, 1, 0, 0]
Actual [4, 1, 0, 0]

Input [4, 8, 8, 8]
Expected [4, 8, 0, 0]
Actual [4, 8, 0, 0]

Input [9, 2, 9, 7]
Expected [9, 2, 0, 0]
Actual [9, 2, 0, 0]

Input [7, 3, 6, 9]
Expected [7, 3, 0, 0]
Actual [7, 3, 0, 0]

Input [4, 9, 3, 9]
Expected [4, 9, 0, 0]
Actual [4, 9, 0, 0]

