# Making Your Model Learn Addition!
## Introduction

Given the string "54+7", the model should return a prediction: "61".

In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

print('Tested with tensorflow version 2.0.1')
print('Using tensorflow version:', tf.__version__)

Tested with tensorflow version 2.0.1
Using tensorflow version: 2.2.0


## Generate Data

In [0]:
all_chars = '0123456789+'

In [3]:
num_features = len(all_chars)

char_to_index = dict((c, i) for i, c in enumerate(all_chars))
index_to_char = dict((i, c) for i, c in enumerate(all_chars))

print('Number of features:', num_features)

Number of features: 11


In [4]:
def generate_data():
    first_num = np.random.randint(low=0,high=100)
    second_num = np.random.randint(low=0,high=100)
    example = str(first_num) + '+' + str(second_num)
    label = str(first_num+second_num)
    return example, label

generate_data()

('45+97', '142')

## Create the Model

Consider these two reviews:

Review 1: This movie is not terrible at all.

Review 2: This movie is pretty decent.

In [5]:
hidden_units = 128
max_time_steps = 5

model = Sequential([
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    RepeatVector(max_time_steps),
    SimpleRNN(hidden_units, return_sequences=True),
    TimeDistributed(Dense(num_features, activation='softmax'))
])

model.compile(loss='categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               17920     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


## Vectorize and De-Vectorize Data


In [6]:
def vectorize_example(example, label):
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label)
    
    for i, c in enumerate(example):
        x[diff_x+i, char_to_index[c]] = 1
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    for i, c in enumerate(label):
        y[diff_y+i, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
        
    return x, y

e, l = generate_data()
print('Text Example and Label:', e, l)
x, y = vectorize_example(e, l)
print('Vectorized Example and Label Shapes:', 
      x.shape, y.shape)

Text Example and Label: 53+68 121
Vectorized Example and Label Shapes: (5, 11) (5, 11)


In [7]:
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

devectorize_example(x)

'53+68'

In [8]:
devectorize_example(y)

'00121'

## Create Dataset

In [9]:
def create_dataset(num_examples=2000):

    x_train = np.zeros((num_examples, max_time_steps, num_features))
    y_train = np.zeros((num_examples, max_time_steps, num_features))

    for i in range(num_examples):
        e, l = generate_data()
        x, y = vectorize_example(e, l)
        x_train[i] = x
        y_train[i] = y
    
    return x_train, y_train

x_train, y_train = create_dataset()
print(x_train.shape, y_train.shape)

(2000, 5, 11) (2000, 5, 11)


In [10]:
devectorize_example(x_train[0])

'50+36'

In [11]:
devectorize_example(y_train[0])

'00086'

## Training the Model


In [12]:
simple_logger = LambdaCallback(
    on_epoch_end=lambda e, l: print('{:.2f}'.format(l['val_accuracy']), end=' _ ')
)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

model.fit(x_train, y_train, epochs=500, validation_split=0.2, verbose=False,
         callbacks=[simple_logger, early_stopping])

0.61 _ 0.62 _ 0.64 _ 0.67 _ 0.68 _ 0.69 _ 0.71 _ 0.72 _ 0.72 _ 0.76 _ 0.77 _ 0.75 _ 0.74 _ 0.78 _ 0.78 _ 0.79 _ 0.80 _ 0.79 _ 0.84 _ 0.84 _ 0.84 _ 0.87 _ 0.89 _ 0.90 _ 0.89 _ 0.90 _ 0.91 _ 0.90 _ 0.90 _ 0.92 _ 0.90 _ 0.92 _ 0.92 _ 0.91 _ 0.92 _ 0.94 _ 0.93 _ 0.92 _ 0.92 _ 0.94 _ 0.94 _ 0.95 _ 0.95 _ 0.95 _ 0.95 _ 0.96 _ 0.95 _ 0.95 _ 0.95 _ 0.94 _ 0.90 _ 0.88 _ 0.90 _ 0.94 _ 0.95 _ 0.96 _ 0.96 _ 0.96 _ 0.96 _ 0.96 _ 0.96 _ 0.96 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.96 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.96 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 0.97 _ 

<tensorflow.python.keras.callbacks.History at 0x7f09a03bdc50>

Create a test set and look at some predictions:

In [14]:
x_test, y_test = create_dataset(num_examples=20)
preds = model.predict(x_test)
full_seq_acc = 0

for i, pred in enumerate(preds):
    pred_str = devectorize_example(pred)
    y_test_str = devectorize_example(y_test[i])
    x_test_str = devectorize_example(x_test[i])
    col = 'green' if pred_str == y_test_str else 'red'
    full_seq_acc += 1/len(preds) * int(pred_str == y_test_str)
    outstring = 'Input: {}, Out: {}, Pred: {}'.format(x_test_str, y_test_str, pred_str)
    print(colored(outstring, col))
print('\nFull sequence accuracy: {:.3f} %'.format(100 * full_seq_acc))

[32mInput: 33+44, Out: 00077, Pred: 00077[0m
[32mInput: 21+16, Out: 00037, Pred: 00037[0m
[32mInput: 08+56, Out: 00064, Pred: 00064[0m
[32mInput: 89+82, Out: 00171, Pred: 00171[0m
[32mInput: 049+4, Out: 00053, Pred: 00053[0m
[32mInput: 80+69, Out: 00149, Pred: 00149[0m
[32mInput: 71+39, Out: 00110, Pred: 00110[0m
[32mInput: 71+84, Out: 00155, Pred: 00155[0m
[32mInput: 76+75, Out: 00151, Pred: 00151[0m
[32mInput: 18+93, Out: 00111, Pred: 00111[0m
[32mInput: 67+59, Out: 00126, Pred: 00126[0m
[31mInput: 029+8, Out: 00037, Pred: 00036[0m
[32mInput: 55+57, Out: 00112, Pred: 00112[0m
[32mInput: 62+30, Out: 00092, Pred: 00092[0m
[32mInput: 20+55, Out: 00075, Pred: 00075[0m
[32mInput: 15+95, Out: 00110, Pred: 00110[0m
[32mInput: 34+39, Out: 00073, Pred: 00073[0m
[31mInput: 21+26, Out: 00047, Pred: 00048[0m
[32mInput: 74+90, Out: 00164, Pred: 00164[0m
[32mInput: 12+25, Out: 00037, Pred: 00037[0m

Full sequence accuracy: 90.000 %
