# Making Your Model Learn Addition!
## Task 1: Introduction

Given the string "54+7", the model should return a prediction: "61".

In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

print('Tested with tensorflow version 2.0.1')
print('Using tensorflow version:', tf.__version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Tested with tensorflow version 2.0.1
Using tensorflow version: 1.14.0


## Task 2: Generate Data
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___

In [2]:
all_chars = '0123456789+'

In [3]:
num_features = len(all_chars)

char_to_index = dict((c, i) for i, c in enumerate(all_chars))
index_to_char = dict((i, c) for i, c in enumerate(all_chars))

print('Number of features: {}'.format(num_features))

Number of features: 11


In [4]:
def generate_data():
    first_num = np.random.randint(0,100)
    second_num = np.random.randint(0,100)
    example = str(first_num) + '+' + str(second_num)
    label = str(first_num+second_num)
    return example, label

generate_data()

('71+92', '163')

In [5]:
hidden_units = 128
max_time_steps = 5

model = Sequential()
model.add(SimpleRNN(hidden_units, input_shape=(None, num_features)))
model.add(RepeatVector(max_time_steps))   
model.add(SimpleRNN(hidden_units, return_sequences=True))
model.add(TimeDistributed(Dense(num_features, activation='softmax')))    

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               17920     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


# Vectorize and Devectorize

In [6]:
def vectorize_example(example, label):
    
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    
    #zero-padding the example and label to have same len as max_time_steps
    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label)
    
    for i, c in enumerate(example):
        x[diff_x+i, char_to_index[c]] = 1
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    for i, c in enumerate(label):
        y[diff_y+i, char_to_index[c]] = 1
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
        
    return x, y

e, l = generate_data()
print('Text Example and Label:', e, l)
x, y = vectorize_example(e, l)
print('Vectorized Example and Label Shapes:', x.shape, y.shape)

Text Example and Label: 10+63 73
Vectorized Example and Label Shapes: (5, 11) (5, 11)


In [7]:
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

devectorize_example(x)

'10+63'

In [8]:
devectorize_example(y)

'00073'

## Task 5: Create Dataset
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___

In [9]:
def create_dataset(num_examples=2000):

    x_train = np.zeros((num_examples, max_time_steps, num_features))
    y_train = np.zeros((num_examples, max_time_steps, num_features))

    for i in range(num_examples):
        e, l = generate_data()
        x, y = vectorize_example(e, l)
        x_train[i] = x
        y_train[i] = y
    
    return x_train, y_train

x_train, y_train = create_dataset()
print(x_train.shape, y_train.shape)

(2000, 5, 11) (2000, 5, 11)


In [10]:
devectorize_example(x_train[0])

'32+19'

In [11]:
devectorize_example(y_train[0])

'00051'

## Task 6: Training the Model
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___

In [12]:
l_cb = LambdaCallback(
    on_epoch_end= lambda e, l: print('{}'.format(l['val_loss'], end = ' _ '))
)
e_cb = EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(x_train, y_train, epochs=500, validation_split=0.2, verbose=False,
         callbacks=[l_cb, e_cb])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
1.0149689245223998
0.9790207195281982
0.9440216112136841
0.9029296469688416
0.8666547536849976
0.82814373254776
0.7735479664802551
0.7631277298927307
0.7341361570358277
0.7140837836265564
0.7072999215126038
0.6679725408554077
0.6238920617103577
0.5758296585083008
0.5242422866821289
0.4909771227836609
0.46039018392562864
0.4326554584503174
0.43561357021331787
0.3807370412349701
0.3531480944156647
0.3527732765674591
0.31347395658493044
0.29729501605033876
0.2759977352619171
0.3099179255962372
0.26500235438346864
0.2555989038944244
0.23673492133617402
0.23437965750694276
0.2500717556476593
0.2234712892770767
0.24943262934684754
0.2782700777053833
0.23056010842323305
0.2085524970293045
0.16805161654949188
0.1682869738340378
0.1797723078727722
0.15844111144542694
0.15074337005615235
0.15331506013870239
0.15522752642631532
0.15832903742790222
0.15838591903448104
0.17606188148260116
0.272173582315445

Create a test set and look at some predictions:

In [13]:
x_test, y_test = create_dataset(num_examples=20)
preds = model.predict(x_test)
full_seq_acc = 0

for i, pred in enumerate(preds):
    pred_str = devectorize_example(pred)
    y_test_str = devectorize_example(y_test[i])
    x_test_str = devectorize_example(x_test[i])
    col = 'green' 
    if pred_str != y_test_str:
        col = 'red'
    full_seq_acc += 1/len(preds) * int(pred_str == y_test_str)
    out = 'Input: {}, Out: {}, Pred: {}'.format(x_test_str, y_test_str, pred_str)
    print(colored(out, col))
print('\nFull sequence accuracy: {:.3f} %'.format(100 * full_seq_acc))

[32mInput: 95+26, Out: 00121, Pred: 00121[0m
[32mInput: 76+59, Out: 00135, Pred: 00135[0m
[32mInput: 29+53, Out: 00082, Pred: 00082[0m
[32mInput: 57+33, Out: 00090, Pred: 00090[0m
[32mInput: 27+36, Out: 00063, Pred: 00063[0m
[32mInput: 91+26, Out: 00117, Pred: 00117[0m
[32mInput: 02+50, Out: 00052, Pred: 00052[0m
[32mInput: 37+85, Out: 00122, Pred: 00122[0m
[32mInput: 07+54, Out: 00061, Pred: 00061[0m
[32mInput: 35+92, Out: 00127, Pred: 00127[0m
[32mInput: 51+14, Out: 00065, Pred: 00065[0m
[32mInput: 03+13, Out: 00016, Pred: 00016[0m
[32mInput: 55+73, Out: 00128, Pred: 00128[0m
[32mInput: 32+45, Out: 00077, Pred: 00077[0m
[32mInput: 81+46, Out: 00127, Pred: 00127[0m
[32mInput: 42+47, Out: 00089, Pred: 00089[0m
[32mInput: 26+12, Out: 00038, Pred: 00038[0m
[32mInput: 86+88, Out: 00174, Pred: 00174[0m
[32mInput: 22+91, Out: 00113, Pred: 00113[0m
[32mInput: 85+12, Out: 00097, Pred: 00097[0m

Full sequence accuracy: 100.000 %
