An implementation of sequence to sequence learning for performing addition with noisy inputs

Input: "5a35+6b1"
Output: "596"
Padding is handled by using a repeated sentinel character (space)

In [1]:
from __future__ import print_function
import numpy as np
from six.moves import range
import sys

In [2]:
import keras as K
import tensorflow as tf
from keras.models import Sequential
from keras import layers

config = tf.ConfigProto(device_count = {'GPU': 1 , 'CPU': 8} ) 
config.gpu_options.allow_growth = True

sess = tf.Session(config=config) 
K.backend.set_session(sess)

Using TensorFlow backend.


In [3]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one-hot integer representation
    + Decode the one-hot or integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.

        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One-hot encode given string C.

        # Arguments
            C: string, to be encoded.
            num_rows: Number of rows in the returned one-hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        """Decode the given vector or 2D array to their character output.

        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [4]:
# All the numbers, plus sign and space for padding.
validchars = '0123456789+ '
noisychars = 'abcdefghijklmnopqrstuvwxyz'
chars = validchars + noisychars
ctable = CharacterTable(chars)

In [5]:
ctable.encode(C='0 +wsk', num_rows=7)

array([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0.,

In [6]:
x = ctable.encode(C='0 +wsk', num_rows=9)
ctable.decode(x)

'0 +wsk   '

In [7]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [8]:
def insert_noise(string, noisychars):
    index = lambda: np.random.randint(0, len(string), 1)[0]
    g = lambda: np.random.choice(list(noisychars))

    str_noisy = str(string)
    i = index()
    str_noisy = str_noisy[0:i]+g()+str_noisy[i:]
    return str_noisy

insert_noise('129', noisychars)

'z129'

In [9]:
def generate_training(noisychars, TRAINING_SIZE = 50000, DIGITS = 3, REVERSE = True):

    # Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
    # int is DIGITS.
    MAXLEN = DIGITS + 1 + DIGITS + 2 

    questions = []
    answers = []
    seen = set()
    print('Generating data...')
    while len(questions) < TRAINING_SIZE:
        f = lambda: int(''.join(np.random.choice(list('0123456789'))
                        for i in range(np.random.randint(1, DIGITS + 1))))
        a, b = f(), f()
        # add noise to the input
        a_noisy = insert_noise(str(a), noisychars)
        b_noisy = insert_noise(str(b), noisychars)
        
        # Skip any addition questions we've already seen
        # Also skip any such that x+Y == Y+x (hence the sorting).
        key = tuple(sorted((a_noisy, b_noisy)))
        if key in seen:
            continue
        seen.add(key)
        # Pad the data with spaces such that it is always MAXLEN.
        q = '{}+{}'.format(a_noisy, b_noisy)
        query = q + ' ' * (MAXLEN - len(q))
        ans = str(a + b) # Correct answer, without noise
        # Answers can be of maximum size DIGITS + 1.
        ans += ' ' * (DIGITS + 1 - len(ans))
        if REVERSE:
            # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
            # space used for padding.)
            query = query[::-1]
        questions.append(query)
        answers.append(ans)
    print('Total addition questions:', len(questions))
    out = np.vstack((np.array(questions), np.array(answers)))
    return out.T

In [10]:
def vectorization(train, DIGITS):
    MAXLEN = 2*DIGITS+1+2
    print('Vectorization...')
    x = np.zeros((train.shape[0], MAXLEN, len(chars)), dtype=np.bool)
    y = np.zeros((train.shape[0], DIGITS + 1, len(chars)), dtype=np.bool)
    # encode questions
    for i, sentence in enumerate(train[:, 0]):
        x[i] = ctable.encode(sentence, MAXLEN)
    # encode answers
    for i, sentence in enumerate(train[:, 1]):
        y[i] = ctable.encode(sentence, DIGITS + 1)

    # Shuffle (x, y) in unison as the later parts of x will almost all be larger
    # digits.
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    x = x[indices]
    y = y[indices]

    # Explicitly set apart 10% for validation data that we never train over.
    split_at = len(x) - len(x) // 10
    (x_train, x_val) = x[:split_at], x[split_at:]
    (y_train, y_val) = y[:split_at], y[split_at:]

    print('Shapes in training Data:')
    print(x_train.shape)
    print(y_train.shape)

    print('Shapes in validation Data:')
    print(x_val.shape)
    print(y_val.shape)
    return x_train, y_train, x_val, y_val

In [11]:
def build_model(chars, rnn_type='gru', DIGITS=3, HIDDEN_SIZE=10, BATCH_SIZE=100, DECODER_LAYERS=1):
    MAXLEN = 2*DIGITS + 1 + 2
    if rnn_type.lower() == 'gru':
        RNN = layers.GRU
    elif rnn_type.lower() == 'lstm':
        RNN = layers.LSTM
    elif rnn_type.lower() == 'rnn':
        RNN = layers.SimpleRNN    
    else:
        print('{rnn_type} RNN type not covered'.format(rnn_type=rnn_type))
        sys.exit(0)
        
    print('Build model...')
    model = Sequential()
    # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
    # Note: In a situation where your input sequences have a variable length,
    # use input_shape=(None, num_feature).
    model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
    # As the decoder RNN's input, repeatedly provide with the last output of
    # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
    # length of output, e.g., when DIGITS=3, max output is 999+999=1998.
    model.add(layers.RepeatVector(DIGITS + 1))
    # The decoder RNN could be multiple layers stacked or a single layer.
    for _ in range(DECODER_LAYERS):
        # By setting return_sequences to True, return not only the last output but
        # all the outputs so far in the form of (num_samples, timesteps,
        # output_dim). This is necessary as TimeDistributed in the below expects
        # the first dimension to be the timesteps.
        model.add(RNN(HIDDEN_SIZE, return_sequences=True))

    # Apply a dense layer to the every temporal slice of an input. For each of step
    # of the output sequence, decide which character should be chosen.
    model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()
    return model

In [12]:
def train(model, x_train, y_train, x_val, y_val,  n_epochs=20, BATCH_SIZE=128, REVERSE=False):
    # Train the model each generation and show predictions against the validation
    # dataset.
    for iteration in range(1, n_epochs+1):
        print()
        print('-' * 50)
        print('Iteration', iteration)
        model.fit(x_train, y_train,
                  batch_size=BATCH_SIZE,
                  epochs=1,
                  validation_data=(x_val, y_val))
        # Select 10 samples from the validation set at random so we can visualize
        # errors.
        for i in range(5):
            ind = np.random.randint(0, len(x_val))
            rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
            preds = model.predict_classes(rowx, verbose=0)
            q = ctable.decode(rowx[0])
            correct = ctable.decode(rowy[0])
            guess = ctable.decode(preds[0], calc_argmax=False)
            print('Question: ', q[::-1] if REVERSE else q, end=' ')
            print('Answer: ', correct, end=' ')
            if correct == guess:
                print(colors.ok + '☑' + colors.close, end=' ')
            else:
                print(colors.fail + '☒' + colors.close, end=' ')
            print('Guess: ', guess, end='\n')

In [13]:
train3char = generate_training(noisychars, TRAINING_SIZE=50000, DIGITS=3, REVERSE=False)

Generating data...
Total addition questions: 50000


In [14]:
train3char.shape

(50000, 2)

In [15]:
r = np.random.randint(low=0, high=train3char.shape[0], size=4)
train3char[r, :]

array([['d0+6w2   ', '62  '],
       ['a3+2m5   ', '28  '],
       ['5d6+65v4 ', '710 '],
       ['34h4+v23 ', '367 ']], dtype='<U9')

In [16]:
x_train_3char, y_train_3char, x_val_3char, y_val_3char = vectorization(train=train3char, DIGITS=3)

Vectorization...
Shapes in training Data:
(45000, 9, 38)
(45000, 4, 38)
Shapes in validation Data:
(5000, 9, 38)
(5000, 4, 38)


In [17]:
model3char = build_model(chars=chars, rnn_type='lstm', DIGITS=3, HIDDEN_SIZE=128, BATCH_SIZE=128, DECODER_LAYERS=1)

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               85504     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 38)             4902      
Total params: 221,990
Trainable params: 221,990
Non-trainable params: 0
_________________________________________________________________


In [18]:
train(model3char, x_train_3char, y_train_3char, x_val_3char, y_val_3char,  n_epochs=30, 
      BATCH_SIZE=128, REVERSE=False)


--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Question:  k1+7r6    Answer:  77   [91m☒[0m Guess:  14  
Question:  9f5+c3    Answer:  98   [91m☒[0m Guess:  14  
Question:  c5+4g17   Answer:  422  [91m☒[0m Guess:  10  
Question:  c4+y29    Answer:  33   [91m☒[0m Guess:  14  
Question:  9r07+w8   Answer:  915  [91m☒[0m Guess:  105 

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Question:  d4+8o14   Answer:  818  [91m☒[0m Guess:  10  
Question:  j22+45z5  Answer:  477  [91m☒[0m Guess:  221 
Question:  3c66+y4   Answer:  370  [91m☒[0m Guess:  14  
Question:  66w7+9y2  Answer:  759  [91m☒[0m Guess:  104 
Question:  f94+75s0  Answer:  844  [91m☒[0m Guess:  100 

--------------------------------------------------
Iteration 3
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Question:  z43+o5    Answer:  48   [91m☒

Question:  a0+80x1   Answer:  801  [91m☒[0m Guess:  800 
Question:  z3+24h9   Answer:  252  [92m☑[0m Guess:  252 
Question:  b8+s5     Answer:  13   [92m☑[0m Guess:  13  
Question:  c55+j34   Answer:  89   [92m☑[0m Guess:  89  
Question:  2x64+y9   Answer:  273  [92m☑[0m Guess:  273 

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Question:  y8+3k9    Answer:  47   [92m☑[0m Guess:  47  
Question:  h255+x4   Answer:  259  [92m☑[0m Guess:  259 
Question:  a2+1x1    Answer:  13   [92m☑[0m Guess:  13  
Question:  n976+4n73 Answer:  1449 [91m☒[0m Guess:  1459
Question:  k0+25a3   Answer:  253  [92m☑[0m Guess:  253 

--------------------------------------------------
Iteration 18
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Question:  30u6+91z1 Answer:  1217 [91m☒[0m Guess:  1118
Question:  55m6+1i5  Answer:  571  [92m☑[0m Guess:  571 
Question:  7v6+s3    Answer:  79   [92m☑[0

Check Five digits reversed:
+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs

In [19]:
# geta data
train5char_reversed = generate_training(noisychars, TRAINING_SIZE=550000, DIGITS=5, REVERSE=False)
x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, y_val_5char_reversed = \
    vectorization(train=train5char_reversed, DIGITS=5)

# build model: we use GRU, since it's simpler and provides similar (if not better) performance
model5char_reversed = build_model(chars=chars, rnn_type='gru', DIGITS=5, HIDDEN_SIZE=128, 
                                  BATCH_SIZE=200, DECODER_LAYERS=1)
# train model
train(model5char_reversed, x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, 
      y_val_5char_reversed,  n_epochs=30, BATCH_SIZE=200, REVERSE=False)

Generating data...
Total addition questions: 550000
Vectorization...
Shapes in training Data:
(495000, 13, 38)
(495000, 6, 38)
Shapes in validation Data:
(55000, 13, 38)
(55000, 6, 38)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 128)               64128     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 6, 128)            0         
_________________________________________________________________
gru_2 (GRU)                  (None, 6, 128)            98688     
_________________________________________________________________
time_distributed_2 (TimeDist (None, 6, 38)             4902      
Total params: 167,718
Trainable params: 167,718
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 1
Trai

Question:  t2+y729       Answer:  731    [92m☑[0m Guess:  731   
Question:  a5+18r8       Answer:  193    [92m☑[0m Guess:  193   
Question:  y75+286e97    Answer:  28772  [92m☑[0m Guess:  28772 
Question:  1f857+z148    Answer:  2005   [91m☒[0m Guess:  1904  
Question:  7h9+218x9     Answer:  2268   [91m☒[0m Guess:  2288  

--------------------------------------------------
Iteration 14
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  500m5+n71197  Answer:  76202  [91m☒[0m Guess:  77101 
Question:  9y4+a1        Answer:  95     [92m☑[0m Guess:  95    
Question:  k5+996w41     Answer:  99646  [91m☒[0m Guess:  99656 
Question:  2c12+t1       Answer:  213    [92m☑[0m Guess:  213   
Question:  38c35+r995    Answer:  4830   [92m☑[0m Guess:  4830  

--------------------------------------------------
Iteration 15
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  6n5+49b180    Answer:  49245  [91m☒[0m Guess:  49235 
Question:  

Question:  k7+g74        Answer:  81     [92m☑[0m Guess:  81    
Question:  3y40+642g58   Answer:  64598  [91m☒[0m Guess:  64698 
Question:  o634+572w4    Answer:  6358   [92m☑[0m Guess:  6358  
Question:  b520+t62533   Answer:  63053  [92m☑[0m Guess:  63053 
Question:  p185+h5       Answer:  190    [92m☑[0m Guess:  190   

--------------------------------------------------
Iteration 28
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  v4+21o4       Answer:  218    [92m☑[0m Guess:  218   
Question:  8674q5+88y95  Answer:  95640  [91m☒[0m Guess:  95540 
Question:  j2860+1u299   Answer:  4159   [92m☑[0m Guess:  4159  
Question:  94n9+1x00     Answer:  1049   [92m☑[0m Guess:  1049  
Question:  w775+v2778    Answer:  3553   [92m☑[0m Guess:  3553  

--------------------------------------------------
Iteration 29
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  a8+512j26     Answer:  51234  [92m☑[0m Guess:  51234 
Question:  

In [20]:
# geta data
train5char_reversed = generate_training(noisychars, TRAINING_SIZE=550000, DIGITS=5, REVERSE=True)
x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, y_val_5char_reversed = \
    vectorization(train=train5char_reversed, DIGITS=5)

# build model: we use GRU, since it's simpler and provides similar (if not better) performance
model5char_reversed = build_model(chars=chars, rnn_type='gru', DIGITS=5, HIDDEN_SIZE=128, 
                                  BATCH_SIZE=200, DECODER_LAYERS=1)
# train model
train(model5char_reversed, x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, 
      y_val_5char_reversed,  n_epochs=30, BATCH_SIZE=200, REVERSE=True)

Generating data...
Total addition questions: 550000
Vectorization...
Shapes in training Data:
(495000, 13, 38)
(495000, 6, 38)
Shapes in validation Data:
(55000, 13, 38)
(55000, 6, 38)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_3 (GRU)                  (None, 128)               64128     
_________________________________________________________________
repeat_vector_3 (RepeatVecto (None, 6, 128)            0         
_________________________________________________________________
gru_4 (GRU)                  (None, 6, 128)            98688     
_________________________________________________________________
time_distributed_3 (TimeDist (None, 6, 38)             4902      
Total params: 167,718
Trainable params: 167,718
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 1
Trai

Question:  192m6+690w04  Answer:  70930  [91m☒[0m Guess:  70927 
Question:  e4+1h50       Answer:  154    [92m☑[0m Guess:  154   
Question:  2f091+9820b0  Answer:  100291 [92m☑[0m Guess:  100291
Question:  w4+370r37     Answer:  37041  [92m☑[0m Guess:  37041 
Question:  l66+f44       Answer:  110    [92m☑[0m Guess:  110   

--------------------------------------------------
Iteration 14
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  q3699+40l2    Answer:  4101   [92m☑[0m Guess:  4101  
Question:  a7+m36        Answer:  43     [92m☑[0m Guess:  43    
Question:  30x6+l1029    Answer:  1335   [92m☑[0m Guess:  1335  
Question:  c6+9z20       Answer:  926    [92m☑[0m Guess:  926   
Question:  9n2+o6        Answer:  98     [92m☑[0m Guess:  98    

--------------------------------------------------
Iteration 15
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  k7+k89253     Answer:  89260  [92m☑[0m Guess:  89260 
Question:  

Question:  o2+10o2       Answer:  104    [92m☑[0m Guess:  104   
Question:  q1+f1         Answer:  2      [92m☑[0m Guess:  2     
Question:  d4+b45        Answer:  49     [92m☑[0m Guess:  49    
Question:  e5+k1         Answer:  6      [92m☑[0m Guess:  6     
Question:  8q10+s8       Answer:  818    [92m☑[0m Guess:  818   

--------------------------------------------------
Iteration 28
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  336u6+3j404   Answer:  6770   [92m☑[0m Guess:  6770  
Question:  6i7+4b9       Answer:  116    [92m☑[0m Guess:  116   
Question:  5i85+5h673    Answer:  6258   [92m☑[0m Guess:  6258  
Question:  912k90+4r49   Answer:  91739  [92m☑[0m Guess:  91739 
Question:  s511+g9       Answer:  520    [92m☑[0m Guess:  520   

--------------------------------------------------
Iteration 29
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  14k88+o0      Answer:  1488   [92m☑[0m Guess:  1488  
Question:  

In [21]:
# geta data
train5char_reversed = generate_training(noisychars, TRAINING_SIZE=550000, DIGITS=5, REVERSE=True)
x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, y_val_5char_reversed = \
    vectorization(train=train5char_reversed, DIGITS=5)

# build model: we use GRU, since it's simpler and provides similar (if not better) performance
model5char_reversed = build_model(chars=chars, rnn_type='gru', DIGITS=5, HIDDEN_SIZE=128, 
                                  BATCH_SIZE=200, DECODER_LAYERS=2)
# train model
train(model5char_reversed, x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, 
      y_val_5char_reversed,  n_epochs=30, BATCH_SIZE=200, REVERSE=True)

Generating data...
Total addition questions: 550000
Vectorization...
Shapes in training Data:
(495000, 13, 38)
(495000, 6, 38)
Shapes in validation Data:
(55000, 13, 38)
(55000, 6, 38)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_5 (GRU)                  (None, 128)               64128     
_________________________________________________________________
repeat_vector_4 (RepeatVecto (None, 6, 128)            0         
_________________________________________________________________
gru_6 (GRU)                  (None, 6, 128)            98688     
_________________________________________________________________
gru_7 (GRU)                  (None, 6, 128)            98688     
_________________________________________________________________
time_distributed_4 (TimeDist (None, 6, 38)             4902      
Total params: 266,406
Trainable params: 266,406
Non-trainable params: 0
__

Question:  4s2+r875      Answer:  917    [92m☑[0m Guess:  917   
Question:  8h5+m71       Answer:  156    [92m☑[0m Guess:  156   
Question:  8z5+x2016     Answer:  2101   [92m☑[0m Guess:  2101  
Question:  x33201+56b0   Answer:  33761  [92m☑[0m Guess:  33761 
Question:  56t1+h0       Answer:  561    [92m☑[0m Guess:  561   

--------------------------------------------------
Iteration 14
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  f62+22r0      Answer:  282    [92m☑[0m Guess:  282   
Question:  1b106+s3      Answer:  1109   [92m☑[0m Guess:  1109  
Question:  f36+e4        Answer:  40     [92m☑[0m Guess:  40    
Question:  8v0+w6        Answer:  86     [92m☑[0m Guess:  86    
Question:  z34263+n6     Answer:  34269  [92m☑[0m Guess:  34269 

--------------------------------------------------
Iteration 15
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  7p3+h0        Answer:  73     [92m☑[0m Guess:  73    
Question:  

Question:  11f9+x7759    Answer:  7878   [92m☑[0m Guess:  7878  
Question:  b6+x2197      Answer:  2203   [92m☑[0m Guess:  2203  
Question:  43f04+s5      Answer:  4309   [92m☑[0m Guess:  4309  
Question:  423d75+5u1703 Answer:  94078  [92m☑[0m Guess:  94078 
Question:  952p95+c37    Answer:  95332  [92m☑[0m Guess:  95332 

--------------------------------------------------
Iteration 28
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  r47+53o261    Answer:  53308  [92m☑[0m Guess:  53308 
Question:  504c3+1q5636  Answer:  20679  [92m☑[0m Guess:  20679 
Question:  3t27+51o78    Answer:  5505   [92m☑[0m Guess:  5505  
Question:  807n7+g0      Answer:  8077   [92m☑[0m Guess:  8077  
Question:  9l2+j1        Answer:  93     [92m☑[0m Guess:  93    

--------------------------------------------------
Iteration 29
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  r7+2r5347     Answer:  25354  [92m☑[0m Guess:  25354 
Question:  

In [22]:
# geta data
train5char_reversed = generate_training(noisychars, TRAINING_SIZE=550000, DIGITS=5, REVERSE=True)
x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, y_val_5char_reversed = \
    vectorization(train=train5char_reversed, DIGITS=5)

# build model: we use GRU, since it's simpler and provides similar (if not better) performance
model5char_reversed = build_model(chars=chars, rnn_type='gru', DIGITS=5, HIDDEN_SIZE=128, 
                                  BATCH_SIZE=200, DECODER_LAYERS=3)
# train model
train(model5char_reversed, x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, 
      y_val_5char_reversed,  n_epochs=30, BATCH_SIZE=200, REVERSE=True)

Generating data...
Total addition questions: 550000
Vectorization...
Shapes in training Data:
(495000, 13, 38)
(495000, 6, 38)
Shapes in validation Data:
(55000, 13, 38)
(55000, 6, 38)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_8 (GRU)                  (None, 128)               64128     
_________________________________________________________________
repeat_vector_5 (RepeatVecto (None, 6, 128)            0         
_________________________________________________________________
gru_9 (GRU)                  (None, 6, 128)            98688     
_________________________________________________________________
gru_10 (GRU)                 (None, 6, 128)            98688     
_________________________________________________________________
gru_11 (GRU)                 (None, 6, 128)            98688     
_________________________________________________________________
time_dis

Question:  41n3+j4       Answer:  417    [92m☑[0m Guess:  417   
Question:  z1+g9         Answer:  10     [92m☑[0m Guess:  10    
Question:  7c70+80b75    Answer:  8845   [92m☑[0m Guess:  8845  
Question:  4k9+m9        Answer:  58     [92m☑[0m Guess:  58    
Question:  x48543+7o235  Answer:  55778  [92m☑[0m Guess:  55778 

--------------------------------------------------
Iteration 14
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  m9342+l4      Answer:  9346   [92m☑[0m Guess:  9346  
Question:  y25+z787      Answer:  812    [92m☑[0m Guess:  812   
Question:  700g3+5b1     Answer:  7054   [92m☑[0m Guess:  7054  
Question:  7i550+4m6     Answer:  7596   [92m☑[0m Guess:  7596  
Question:  87b3+3b0      Answer:  903    [92m☑[0m Guess:  903   

--------------------------------------------------
Iteration 15
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  c5+78n5       Answer:  790    [92m☑[0m Guess:  790   
Question:  

Question:  w8+14p4       Answer:  152    [92m☑[0m Guess:  152   
Question:  32v4+c5       Answer:  329    [92m☑[0m Guess:  329   
Question:  96y18+v721    Answer:  10339  [92m☑[0m Guess:  10339 
Question:  5f32+n6189    Answer:  6721   [92m☑[0m Guess:  6721  
Question:  z7+3n5        Answer:  42     [92m☑[0m Guess:  42    

--------------------------------------------------
Iteration 28
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  1u0345+v8     Answer:  10353  [92m☑[0m Guess:  10353 
Question:  s2+k3417      Answer:  3419   [92m☑[0m Guess:  3419  
Question:  8x9+1g66      Answer:  255    [92m☑[0m Guess:  255   
Question:  r3+91d42      Answer:  9145   [92m☑[0m Guess:  9145  
Question:  h3+8806z5     Answer:  88068  [92m☑[0m Guess:  88068 

--------------------------------------------------
Iteration 29
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  q7+q820       Answer:  827    [92m☑[0m Guess:  827   
Question:  

## Even harder: instead of injecting noise, replace valid characters with noisy ones

Please note that the task has not a solution: at the injected noisy character, there can be 10 possible characters! Yet, the network performs remarkably well =)

In [23]:
def replace_noise(string, noisychars):
    index = lambda: np.random.randint(0, len(string), 1)[0]
    g = lambda: np.random.choice(list(noisychars))

    str_noisy = str(string)
    i = index()
    noise = g()
    str_noisy = str_noisy[:i]+noise+str_noisy[i+1:]
    return str_noisy

print(insert_noise('129', noisychars))
print(replace_noise('129', noisychars))

1o29
12j


In [24]:
def generate_training(noisychars, TRAINING_SIZE = 50000, DIGITS = 3, REVERSE = True):

    # Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
    # int is DIGITS.
    MAXLEN = DIGITS + 1 + DIGITS +1

    questions = []
    answers = []
    seen = set()
    print('Generating data...')
    while len(questions) < TRAINING_SIZE:
        f = lambda: int(''.join(np.random.choice(list('0123456789'))
                        for i in range(np.random.randint(1, DIGITS + 1))))
        a, b = f(), f()
        # add noise to the input
        a_noisy = replace_noise(str(a), noisychars)
        b_noisy = insert_noise(str(b), noisychars)
        
        # Skip any addition questions we've already seen
        # Also skip any such that x+Y == Y+x (hence the sorting).
        key = tuple(sorted((a_noisy, b_noisy)))
        if key in seen:
            continue
        seen.add(key)
        # Pad the data with spaces such that it is always MAXLEN.
        q = '{}+{}'.format(a_noisy, b_noisy)
        query = q + ' ' * (MAXLEN - len(q))
        ans = str(a + b) # Correct answer, without noise
        # Answers can be of maximum size DIGITS + 1.
        ans += ' ' * (DIGITS + 1 - len(ans))
        if REVERSE:
            # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
            # space used for padding.)
            query = query[::-1]
        questions.append(query)
        answers.append(ans)
    print('Total addition questions:', len(questions))
    out = np.vstack((np.array(questions), np.array(answers)))
    return out.T

In [25]:
def vectorization(train, DIGITS):
    MAXLEN = 2*DIGITS+1+1
    print('Vectorization...')
    x = np.zeros((train.shape[0], MAXLEN, len(chars)), dtype=np.bool)
    y = np.zeros((train.shape[0], DIGITS + 1, len(chars)), dtype=np.bool)
    # encode questions
    for i, sentence in enumerate(train[:, 0]):
        x[i] = ctable.encode(sentence, MAXLEN)
    # encode answers
    for i, sentence in enumerate(train[:, 1]):
        y[i] = ctable.encode(sentence, DIGITS + 1)

    # Shuffle (x, y) in unison as the later parts of x will almost all be larger
    # digits.
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    x = x[indices]
    y = y[indices]

    # Explicitly set apart 10% for validation data that we never train over.
    split_at = len(x) - len(x) // 10
    (x_train, x_val) = x[:split_at], x[split_at:]
    (y_train, y_val) = y[:split_at], y[split_at:]

    print('Shapes in training Data:')
    print(x_train.shape)
    print(y_train.shape)

    print('Shapes in validation Data:')
    print(x_val.shape)
    print(y_val.shape)
    return x_train, y_train, x_val, y_val

In [26]:
def build_model(chars, rnn_type='gru', DIGITS=3, HIDDEN_SIZE=10, BATCH_SIZE=100, DECODER_LAYERS=1):
    MAXLEN = 2*DIGITS + 1 +1
    if rnn_type.lower() == 'gru':
        RNN = layers.GRU
    elif rnn_type.lower() == 'lstm':
        RNN = layers.LSTM
    elif rnn_type.lower() == 'rnn':
        RNN = layers.SimpleRNN    
    else:
        print('{rnn_type} RNN type not covered'.format(rnn_type=rnn_type))
        sys.exit(0)
        
    print('Build model...')
    model = Sequential()
    # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
    # Note: In a situation where your input sequences have a variable length,
    # use input_shape=(None, num_feature).
    model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
    # As the decoder RNN's input, repeatedly provide with the last output of
    # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
    # length of output, e.g., when DIGITS=3, max output is 999+999=1998.
    model.add(layers.RepeatVector(DIGITS + 1))
    # The decoder RNN could be multiple layers stacked or a single layer.
    for _ in range(DECODER_LAYERS):
        # By setting return_sequences to True, return not only the last output but
        # all the outputs so far in the form of (num_samples, timesteps,
        # output_dim). This is necessary as TimeDistributed in the below expects
        # the first dimension to be the timesteps.
        model.add(RNN(HIDDEN_SIZE, return_sequences=True))

    # Apply a dense layer to the every temporal slice of an input. For each of step
    # of the output sequence, decide which character should be chosen.
    model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()
    return model

In [27]:
# geta data
train5char_reversed = generate_training(noisychars, TRAINING_SIZE=550000, DIGITS=5, REVERSE=True)
x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, y_val_5char_reversed = \
    vectorization(train=train5char_reversed, DIGITS=5)

# build model: we use GRU, since it's simpler and provides similar (if not better) performance
model5char_reversed = build_model(chars=chars, rnn_type='gru', DIGITS=5, HIDDEN_SIZE=128, 
                                  BATCH_SIZE=200, DECODER_LAYERS=1)
# train model
train(model5char_reversed, x_train_5char_reversed, y_train_5char_reversed, x_val_5char_reversed, 
      y_val_5char_reversed,  n_epochs=30, BATCH_SIZE=200, REVERSE=True)

Generating data...
Total addition questions: 550000
Vectorization...
Shapes in training Data:
(495000, 12, 38)
(495000, 6, 38)
Shapes in validation Data:
(55000, 12, 38)
(55000, 6, 38)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_12 (GRU)                 (None, 128)               64128     
_________________________________________________________________
repeat_vector_6 (RepeatVecto (None, 6, 128)            0         
_________________________________________________________________
gru_13 (GRU)                 (None, 6, 128)            98688     
_________________________________________________________________
time_distributed_6 (TimeDist (None, 6, 38)             4902      
Total params: 167,718
Trainable params: 167,718
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 1
Trai

Question:  1i+9s49      Answer:  962    [91m☒[0m Guess:  966   
Question:  f445+1r58    Answer:  6603   [91m☒[0m Guess:  3603  
Question:  1f046+c7     Answer:  12053  [91m☒[0m Guess:  10053 
Question:  b+6k73       Answer:  677    [91m☒[0m Guess:  679   
Question:  f777+e7431   Answer:  12208  [91m☒[0m Guess:  11109 

--------------------------------------------------
Iteration 14
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  n+a51        Answer:  52     [91m☒[0m Guess:  56    
Question:  l+p83152     Answer:  83160  [91m☒[0m Guess:  83166 
Question:  850w+q5651   Answer:  14155  [91m☒[0m Guess:  14157 
Question:  6v76+w680    Answer:  7356   [91m☒[0m Guess:  7956  
Question:  7108q+6m162  Answer:  77248  [91m☒[0m Guess:  77257 

--------------------------------------------------
Iteration 15
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  e+70k9       Answer:  713    [91m☒[0m Guess:  718   
Question:  v93+8z4    

Question:  26l2+9v83    Answer:  3595   [91m☒[0m Guess:  3625  
Question:  17j+w5       Answer:  175    [91m☒[0m Guess:  178   
Question:  s62+708n77   Answer:  71439  [92m☑[0m Guess:  71439 
Question:  94q+s24292   Answer:  25236  [91m☒[0m Guess:  25235 
Question:  8n+8p4       Answer:  170    [91m☒[0m Guess:  168   

--------------------------------------------------
Iteration 28
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  8683z+g7     Answer:  86844  [92m☑[0m Guess:  86844 
Question:  11k5+1943c9  Answer:  20604  [91m☒[0m Guess:  20594 
Question:  2640o+82p5   Answer:  27228  [91m☒[0m Guess:  27234 
Question:  m+u3622      Answer:  3622   [91m☒[0m Guess:  3624  
Question:  h+n139       Answer:  145    [91m☒[0m Guess:  144   

--------------------------------------------------
Iteration 29
Train on 495000 samples, validate on 55000 samples
Epoch 1/1
Question:  3h+1e311     Answer:  1349   [91m☒[0m Guess:  1348  
Question:  b+775x9    