In [1]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback
import tensorflow as tf
from termcolor import colored

In [2]:
# Checking if GPU available
physical_devices = tf.config.experimental.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [3]:
chars_for_add = '0123456789+'
num_features = len(chars_for_add)
char_to_index = dict((c, i) for i, c in enumerate(chars_for_add))
index_to_char = dict((i, c) for i, c in enumerate(chars_for_add))

In [21]:
def generate_samples_for_add():
    first = np.random.randint(0,100)
    second = np.random.randint(0,100)
    sample = str(first) + '+' + str(second)
    label = str(first+second)
    return sample, label
generate_samples_for_add()

('74+84', '158')

In [22]:
hidden_units = 128
max_time_steps = 7


    # Simple RNN
"""Fully-connected RNN where output is to be fed back to input.
    units: Positive integer, dimensionality of the output space.
    activation: Activation function to use. Default: hyperbolic 
    tangent (tanh). If you pass None, no activation is applied (ie.
    "linear" activation: a(x) = x).
    return_sequences: Boolean. Whether to return the last output in
    the output sequence, or the full sequence. Default: False."""

    # Repeat Vector
"""Repeats the input max_time_steps times"""

    # Time distributed layer
"""This wrapper allows to apply a layer to every temporal slice of
    an input.
    The input should be at least 3D, and the dimension of index one 
    will be considered to be the temporal dimension."""

model = Sequential([
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    
    RepeatVector(max_time_steps),
    
    SimpleRNN(hidden_units, return_sequences=True),
    
    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 128)               17920     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 7, 128)            0         
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, 7, 128)            32896     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 7, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


In [23]:
def vectorize_sample(sample, label):
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    diff_x = max_time_steps - len(sample)
    diff_y = max_time_steps - len(label)
    for i, c in enumerate(sample):
        x[i+diff_x, char_to_index[c]] = 1
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    for i, c in enumerate(label):
        y[i+diff_y, char_to_index[c]] = 1
    for i in range(diff_x):
        y[i, char_to_index['0']] = 1
    return x,y    
        
e, l = generate_samples_for_add()
print(e, l)
x, y = vectorize_sample(e, l)
print(x.shape, y.shape)

6+30 36
(7, 11) (7, 11)


In [24]:
def devectorize_sample(sample):
    res = [index_to_char[np.argmax(vec)] for i, vec in enumerate(sample)]
    return ''.join(res)

devectorize_sample(x)

'0006+30'

In [25]:
devectorize_sample(y)

'0000036'

In [26]:
# Dataset
def create_dataset(num_samples=5000):
    x = np.zeros((num_samples, max_time_steps, num_features))
    y = np.zeros((num_samples, max_time_steps, num_features))
    for i in range(num_samples):
        e, l = generate_samples_for_add()
        e, l = vectorize_sample(e, l)
        x[i] = e
        y[i] = l
    return x, y    

x, y = create_dataset()
print(x.shape, y.shape)
devectorize_sample(x[78])


(5000, 7, 11) (5000, 7, 11)


'0090+65'

In [27]:
devectorize_sample(y[78])


'0000155'

In [28]:
# Model Training
# Callbacks: utilities called at certain points during model training.
# on_epoch_begin	called at the beginning of every epoch.
# on_epoch_end	called at the end of every epoch.
l_cb = LambdaCallback(
    on_epoch_end = lambda e, l:print('{:.2f}'.format(l['val_accuracy']), end=' _ ')
)

# EarlyStopping: Stop training when a monitored metric has stopped improving.
# monitor	Quantity to be monitored.
# patience	Number of epochs with no improvement after which training will be stopped.
es_cb = EarlyStopping(monitor='val_loss', patience=20)


# verbose	0 or 1. Verbosity mode. 0 = silent, 1 = progress bar.
model.fit(x, y, epochs=500, batch_size=256, validation_split=0.2,
         verbose=True , callbacks=[es_cb, l_cb])



Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500


Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500


Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500


Epoch 172/500
Epoch 173/500


<tensorflow.python.keras.callbacks.History at 0x7faa647ab390>

In [29]:
x_test, y_test = create_dataset(10)
preds = model.predict(x_test)

for i, pred in enumerate(preds):
    y = devectorize_sample(y_test[i])
    y_pred = devectorize_sample(pred)
    col = 'green'
    if y!=y_pred:
        col='red'
    out = 'Input: '+devectorize_sample(x_test[i])+ ' Out:'+y+' Pred: '+y_pred
    print(colored(out, col))

[31mInput: 0035+20 Out:0000055 Pred: 0000155[0m
[31mInput: 00025+9 Out:0000034 Pred: 0000134[0m
[31mInput: 0038+37 Out:0000075 Pred: 0002175[0m
[31mInput: 0001+37 Out:0000038 Pred: 0000238[0m
[31mInput: 0013+85 Out:0000098 Pred: 0001198[0m
[31mInput: 0028+19 Out:0000047 Pred: 0001247[0m
[31mInput: 0070+47 Out:0000117 Pred: 0002117[0m
[31mInput: 0004+96 Out:0000100 Pred: 0003100[0m
[31mInput: 0047+11 Out:0000058 Pred: 0000458[0m
[31mInput: 0051+79 Out:0000130 Pred: 0001130[0m
