# **Uploads**

---

Upload necessary files

In [1]:
from google.colab import files
files.upload()

Saving data_generator.py to data_generator (1).py


{'data_generator.py': b'import random\nimport cv2\nimport numpy as np\n\nfrom imgaug import augmenters as iaa\nfrom tensorflow.keras import datasets\n\n(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()\n\ndef sometimes(aug): return iaa.Sometimes(0.5, aug)\n\nseq = iaa.Sequential([\n    iaa.Affine(\n        scale={"x": (0.8, 1), "y": (0.8, 1)},\n        translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},\n        rotate=(-15, 15),\n        shear=(-5, 5),\n        cval=(0, 0),\n        mode=\'constant\'\n    )\n])\n\n\ndef crop_number(number):\n    """\n        Crops a MNIST digit to its containing bounding box with some random noise.\n    """\n    vsum = np.sum(number, axis=0)\n    vsum[vsum > 0] = 1\n    vdif = np.diff(vsum)\n    vdif[vdif > 0] = 1\n    xs = np.argwhere(vdif > 0).ravel()\n\n    random_cut1 = np.random.randint(-1, 3)\n    random_cut2 = np.random.randint(-1, 3)\n\n    try:\n        cropped_number = number[0:28, xs[0] - random_cut1:xs[1] + random_cut2]

# **Imports**

---

Import necessary libraries and set constant variables

In [0]:
%matplotlib inline
import cv2
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score

from data_generator import training_generator, test_generator

NUM_CLASSES = 254 + 254 + 1 # number of classes
X_ROWS = 7
Y_ROWS = 3
CHARS_X = ' +0123456789' # characters present in the input
CHARS_Y = ' 0123456789' # characters present in the output

# **Utils**

---

Utils functions

In [0]:
def generate_balanced_data(no_ex_per_class=100, data_type='train'):
  classes = NUM_CLASSES * [0]
  per = 0
  X = []
  y = []
  
  if data_type == 'train':
    generator = training_generator(batch_size=1)
  elif data_type == 'test':
    generator = test_generator(batch_size=1)
  else:
    raise Exception('Invalid data type!')
  
  while True:
    x, numbers, numbers_sum = next(generator)
    nr = '{0}+{1}'.format(numbers[0], numbers[1])
    s = str(numbers_sum[0][0])
    
    if classes[int(s)] < no_ex_per_class:
      classes[int(s)] += 1
      X.append(nr)
      y.append(s)
      
      if int((sum(classes) * 100) / (NUM_CLASSES * no_ex_per_class)) > per:
        per = int((sum(classes) * 100) / (NUM_CLASSES * no_ex_per_class))
        print('--{0}%/100%--'.format(per))

#     if (sum(classes) == NUM_CLASSES * no_ex_per_class):
#       break
    if int((sum(classes) * 100) / (NUM_CLASSES * no_ex_per_class)) >= 90: # because otherwise it takes too long
      break
    
  X = np.array(X)
  y = np.array(y)

  return X, y

def encode_x_str(s):
  s = ' ' * (X_ROWS - len(s)) + s
  x = np.zeros((X_ROWS, len(CHARS_X)))
  
  for i in range(X_ROWS):
    x[i, CHARS_X.find(s[i])] = 1
    
  return x

def encode_y_str(s):
  s = ' ' * (Y_ROWS - len(s)) + s
  y = np.zeros((Y_ROWS, len(CHARS_Y)))
  
  for i in range(Y_ROWS):
    y[i, CHARS_Y.find(s[i])] = 1
    
  return y

def decode_x_str(x):
  s = ''
  for i in range(X_ROWS):
    pos = np.argmax(x[i])
    if pos == 0:
      continue
      
    s += CHARS_X[pos]
    
  return s

def decode_y_str(y):
  s = ''
  for i in range(Y_ROWS):
    pos = np.argmax(y[i])
    if pos == 0:
      continue
      
    s += CHARS_Y[pos]
    
  return s

def encode_x(X):
  X_new = []
  for i in range(X.shape[0]):
    X_new.append(encode_x_str(X[i]))
    
  return np.array(X_new)

def encode_y(y):
  y_new = []
  for i in range(y.shape[0]):
    y_new.append(encode_y_str(y[i]))
  y_new = np.array(y_new)
    
  y_time_step = []
  y_time_step.append(y_new[:, 0, :])
  y_time_step.append(y_new[:, 1, :])
  y_time_step.append(y_new[:, 2, :])
    
  return y_new, np.array(y_time_step)

Generating the data

In [51]:
X_train, y_train = generate_balanced_data(no_ex_per_class=100, data_type='train')
X_val, y_val = generate_balanced_data(no_ex_per_class=10, data_type='test')
X_test, y_test = generate_balanced_data(no_ex_per_class=10, data_type='test')

X_train_encoded = encode_x(X_train)
y_train_encoded, y_train_encoded_time_step = encode_y(y_train)
X_val_encoded = encode_x(X_val)
y_val_encoded, y_val_encoded_time_step = encode_y(y_val)
X_test_encoded = encode_x(X_test)
y_test_encoded, y_test_encoded_time_step = encode_y(y_test)

--1%/100%--
--2%/100%--
--3%/100%--
--4%/100%--
--5%/100%--
--6%/100%--
--7%/100%--
--8%/100%--
--9%/100%--
--10%/100%--
--11%/100%--
--12%/100%--
--13%/100%--
--14%/100%--
--15%/100%--
--16%/100%--
--17%/100%--
--18%/100%--
--19%/100%--
--20%/100%--
--21%/100%--
--22%/100%--
--23%/100%--
--24%/100%--
--25%/100%--
--26%/100%--
--27%/100%--
--28%/100%--
--29%/100%--
--30%/100%--
--31%/100%--
--32%/100%--
--33%/100%--
--34%/100%--
--35%/100%--
--36%/100%--
--37%/100%--
--38%/100%--
--39%/100%--
--40%/100%--
--41%/100%--
--42%/100%--
--43%/100%--
--44%/100%--
--45%/100%--
--46%/100%--
--47%/100%--
--48%/100%--
--49%/100%--
--50%/100%--
--51%/100%--
--52%/100%--
--53%/100%--
--54%/100%--
--55%/100%--
--56%/100%--
--57%/100%--
--58%/100%--
--59%/100%--
--60%/100%--
--61%/100%--
--62%/100%--
--63%/100%--
--64%/100%--
--65%/100%--
--66%/100%--
--67%/100%--
--68%/100%--
--69%/100%--
--70%/100%--
--71%/100%--
--72%/100%--
--73%/100%--
--74%/100%--
--75%/100%--
--76%/100%--
--77%/100%--
--78%/10

Search the best value for the hidden units.

In [52]:
n_a_v = [64, 128]
models = []

for n_a in n_a_v:
  # training
  # ----------------------------------------------------------------------------
  LSTM_cell_encoder = tf.keras.layers.LSTM(n_a, return_state=True)
  LSTM_cell_decoder = tf.keras.layers.LSTM(n_a, return_state=True)
  densor = tf.keras.layers.Dense(len(CHARS_Y), activation=tf.nn.softmax)
  reshapor_x = tf.keras.layers.Reshape((1, 12))
  reshapor_y = tf.keras.layers.Reshape((1, 11))

  # encoder
  X = tf.keras.layers.Input(shape=(X_ROWS, len(CHARS_X)))
  a0 = tf.keras.layers.Input(shape=(n_a,))
  c0 = tf.keras.layers.Input(shape=(n_a,))

  a = a0
  c = c0

  for t in range(X_ROWS):
    x = tf.keras.layers.Lambda(lambda x: x[:, t, :])(X)
    x = reshapor_x(x) # tensor needs to be of shape (batch, time step, state)
    a, _, c = LSTM_cell_encoder(x, initial_state=[a, c])

  # decoder
  y_init = tf.keras.layers.Input(shape=(1, len(CHARS_Y)))
  Y = tf.keras.layers.Input(shape=(Y_ROWS, len(CHARS_Y)))

  outputs = []

  for t in range(Y_ROWS):
    if t == 0:
      y = tf.keras.layers.Lambda(lambda y: y[:, 0, :])(y_init) # after this reshape?
    else:
      y = tf.keras.layers.Lambda(lambda y: y[:, t - 1, :])(Y)
    y = reshapor_y(y)
    
    a, _, c = LSTM_cell_decoder(y, initial_state=[a, c])
  
    out = densor(a)  
    outputs.append(out)
    
  model = tf.keras.Model(inputs=[X, a0, c0, y_init, Y], outputs=outputs)

  opt = tf.keras.optimizers.Adam()
  model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

  m = X_train_encoded.shape[0]
  a0 = np.zeros((m, n_a))
  c0 = np.zeros((m, n_a))
  y_init = np.zeros((m, 1, len(CHARS_Y)))

  model.fit([X_train_encoded, a0, c0, y_init, y_train_encoded], list(y_train_encoded_time_step), epochs=20)
  
  # predicting
  # ----------------------------------------------------------------------------
  #encoder
  X_ = tf.keras.layers.Input(shape=(X_ROWS, len(CHARS_X)))
  a0_ = tf.keras.layers.Input(shape=(n_a,))
  c0_ = tf.keras.layers.Input(shape=(n_a,))

  a_ = a0_
  c_ = c0_

  for t in range(X_ROWS):
    x_ = tf.keras.layers.Lambda(lambda x: x[:, t, :])(X_)
    x_ = reshapor_x(x_) # tensor needs to be of shape (batch, time step, state)
    a_, _, c_ = LSTM_cell_encoder(x_, initial_state=[a_, c_])

  # decoder 
  xx_ = tf.keras.layers.Input(shape=(1, len(CHARS_Y)))
  inp = xx_

  outputs_ = []

  for t in range(Y_ROWS):
    y_ = tf.keras.layers.Lambda(lambda y: y[:, 0, :])(inp)
  
    y_ = reshapor_y(y_)
    
    a_, _, c_ = LSTM_cell_decoder(y_, initial_state=[a_, c_])
  
    out_ = densor(a_) 
  
    inp = reshapor_y(out_)
    outputs_.append(inp)
  
  model_inf_ = tf.keras.Model(inputs=[X_, a0_, c0_, xx_], outputs=outputs_)
  
  m_pos = 1
  a0 = np.zeros((m_pos, n_a))
  c0 = np.zeros((m_pos, n_a))
  xx = np.zeros((m_pos, 1, len(CHARS_Y)))

  y_pred = []
  for pos in range(X_val_encoded.shape[0]):
    p = model_inf_.predict([X_val_encoded[pos:pos+m_pos, :, :], a0, c0, xx])
    y_pred.append(decode_y_str(p))
    
  acc = accuracy_score(y_val, y_pred)
  models.append((n_a, acc))
  
models.sort(key=lambda k: k[1], reverse=True)
print('Best model hyperparameters:\n \
Accuracy: {0}\n \
Hidden units: {1}'.format(models[0][1], models[0][0]))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Best model hyperparameters:
 Accuracy: 0.9753328967474351
 Hidden units: 128


I used two LSTMs. The first LSTM is the encoder and it is used to learn a good representation of the input, its input is the actual input. The second LSTM is the decoder and outputs the sum of the two numbers from the input, its input is the input from the encoder. 

Training

In [0]:
N_A = 128 # hidden units for the best model
LSTM_cell_encoder = tf.keras.layers.LSTM(N_A, return_state=True)
LSTM_cell_decoder = tf.keras.layers.LSTM(N_A, return_state=True)
densor = tf.keras.layers.Dense(len(CHARS_Y), activation=tf.nn.softmax)
reshapor_x = tf.keras.layers.Reshape((1, 12))
reshapor_y = tf.keras.layers.Reshape((1, 11))

#encoder
X = tf.keras.layers.Input(shape=(X_ROWS, len(CHARS_X)))
a0 = tf.keras.layers.Input(shape=(N_A,))
c0 = tf.keras.layers.Input(shape=(N_A,))

a = a0
c = c0

for t in range(X_ROWS):
  x = tf.keras.layers.Lambda(lambda x: x[:, t, :])(X)
  x = reshapor_x(x) # tensor needs to be of shape (batch, time step, state)
  a, _, c = LSTM_cell_encoder(x, initial_state=[a, c])

# decoder
y_init = tf.keras.layers.Input(shape=(1, len(CHARS_Y)))
Y = tf.keras.layers.Input(shape=(Y_ROWS, len(CHARS_Y)))

outputs = []

for t in range(Y_ROWS):
  if t == 0:
    y = tf.keras.layers.Lambda(lambda y: y[:, 0, :])(y_init) # after this reshape?
  else:
    y = tf.keras.layers.Lambda(lambda y: y[:, t - 1, :])(Y)
  y = reshapor_y(y)
    
  a, _, c = LSTM_cell_decoder(y, initial_state=[a, c])
  
  out = densor(a)  
  outputs.append(out)
    
model = tf.keras.Model(inputs=[X, a0, c0, y_init, Y], outputs=outputs)

In [0]:
opt = tf.keras.optimizers.Adam()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

m = X_train_encoded.shape[0]
a0 = np.zeros((m, N_A))
c0 = np.zeros((m, N_A))
y_init = np.zeros((m, 1, len(CHARS_Y)))

In [55]:
print('The model architecture:')
model.summary()

The model architecture:
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_136 (InputLayer)          (None, 7, 12)        0                                            
__________________________________________________________________________________________________
lambda_303 (Lambda)             (None, 12)           0           input_136[0][0]                  
__________________________________________________________________________________________________
lambda_302 (Lambda)             (None, 12)           0           input_136[0][0]                  
__________________________________________________________________________________________________
lambda_301 (Lambda)             (None, 12)           0           input_136[0][0]                  
_____________________________________________________________________________________

In [56]:
model.fit([X_train_encoded, a0, c0, y_init, y_train_encoded], list(y_train_encoded_time_step), epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f3e08f78ef0>

Predicting

In [0]:
#encoder
X_ = tf.keras.layers.Input(shape=(X_ROWS, len(CHARS_X)))
a0_ = tf.keras.layers.Input(shape=(N_A,))
c0_ = tf.keras.layers.Input(shape=(N_A,))

a_ = a0_
c_ = c0_

for t in range(X_ROWS):
  x_ = tf.keras.layers.Lambda(lambda x: x[:, t, :])(X_)
  x_ = reshapor_x(x_) # tensor needs to be of shape (batch, time step, state)
  a_, _, c_ = LSTM_cell_encoder(x_, initial_state=[a_, c_])

# decoder 
xx_ = tf.keras.layers.Input(shape=(1, len(CHARS_Y)))
inp = xx_

outputs_ = []

for t in range(Y_ROWS):
  y_ = tf.keras.layers.Lambda(lambda y: y[:, 0, :])(inp)
  
  y_ = reshapor_y(y_)
    
  a_, _, c_ = LSTM_cell_decoder(y_, initial_state=[a_, c_])
  
  out_ = densor(a_)
#   outputs_.append(out_)  
  
  inp = reshapor_y(out_)
  outputs_.append(inp)
  
model_inf_ = tf.keras.Model(inputs=[X_, a0_, c0_, xx_], outputs=outputs_)

In [58]:
m_pos = 1
a0 = np.zeros((m_pos, N_A))
c0 = np.zeros((m_pos, N_A))
xx = np.zeros((m_pos, 1, len(CHARS_Y)))

y_pred = []
for pos in range(X_test_encoded.shape[0]):
  p = model_inf_.predict([X_test_encoded[pos:pos+m_pos, :, :], a0, c0, xx])
  y_pred.append(decode_y_str(p))
    
acc = accuracy_score(y_test, y_pred)
print('The accuracy on the test set: {0}'.format(acc))

The accuracy on the test set: 0.9742414320017463


In [59]:
nrs = np.random.randint(0, len(y_test), size=10)
for nr in nrs:
  print('Input: {0}\n \
  True sum: {1}\n \
  Predicted sum: {2}'.format(X_test[nr], y_test[nr], y_pred[nr]))

Input: 73+152
   True sum: 225
   Predicted sum: 225
Input: 211+135
   True sum: 346
   Predicted sum: 346
Input: 247+238
   True sum: 485
   Predicted sum: 485
Input: 48+19
   True sum: 67
   Predicted sum: 67
Input: 181+185
   True sum: 366
   Predicted sum: 366
Input: 69+223
   True sum: 292
   Predicted sum: 292
Input: 3+136
   True sum: 139
   Predicted sum: 139
Input: 194+116
   True sum: 310
   Predicted sum: 310
Input: 61+8
   True sum: 69
   Predicted sum: 69
Input: 18+158
   True sum: 176
   Predicted sum: 176
