In [1]:
# -*- coding: utf-8 -*-
'''An implementation of sequence to sequence learning for performing addition

Input: "535+61"
Output: "596"
Padding is handled by using a repeated sentinel character (space)

Input may optionally be inverted, shown to increase performance in many tasks in:
"Learning to Execute"
http://arxiv.org/abs/1410.4615
and
"Sequence to Sequence Learning with Neural Networks"
http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf
Theoretically it introduces shorter term dependencies between source and target.

Two digits inverted:
+ One layer LSTM (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs

Three digits inverted:
+ One layer LSTM (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs

Four digits inverted:
+ One layer LSTM (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs

Five digits inverted:
+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
'''

from __future__ import print_function
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.

        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.

        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [3]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [4]:
# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
INVERT = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

In [5]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if INVERT:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 50000


In [6]:
# Print example test data.
# Note that questions are inverted in the training for better performance.
for i in range(10):
    print('%s = %s' %(questions[i][::-1],expected[i]))

82+4    = 86  
22+8    = 30  
3+43    = 46  
18+8    = 26  
85+7    = 92  
828+36  = 864 
48+5    = 53  
32+3    = 35  
195+3   = 198 
2+35    = 37  


In [7]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [8]:
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

In [9]:
# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

In [10]:
print('Training Data:')
print(x_train.shape)
print(y_train.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)


In [11]:
print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [12]:
# Try replacing GRU, or SimpleRNN.
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [13]:
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

Build model...


In [14]:
# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [18]:
no_iter=200

In [19]:
# Train the model each generation and show predictions against the validation
# dataset.
for iteration in range(1, no_iter):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if INVERT else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 896+208 T 1104 [91m☒[0m 1100
Q 305+61  T 366  [91m☒[0m 367 
Q 8+607   T 615  [91m☒[0m 613 
Q 785+71  T 856  [91m☒[0m 850 
Q 64+881  T 945  [91m☒[0m 941 
Q 411+3   T 414  [91m☒[0m 415 
Q 12+216  T 228  [91m☒[0m 222 
Q 297+319 T 616  [92m☑[0m 616 
Q 881+864 T 1745 [91m☒[0m 1749
Q 10+257  T 267  [91m☒[0m 260 

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 832+371 T 1203 [91m☒[0m 1200
Q 603+0   T 603  [91m☒[0m 605 
Q 902+2   T 904  [91m☒[0m 903 
Q 211+48  T 259  [91m☒[0m 261 
Q 627+52  T 679  [91m☒[0m 683 
Q 8+188   T 196  [91m☒[0m 195 
Q 14+90   T 104  [91m☒[0m 105 
Q 89+1    T 90   [91m☒[0m 80  
Q 71+44   T 115  [92m☑[0m 115 
Q 870+98  T 968  [91m☒[0m 966 

--------------------------------------------------
Iteration 3
Train on 45000 samples, valida

Q 91+198  T 289  [92m☑[0m 289 
Q 556+46  T 602  [92m☑[0m 602 
Q 108+458 T 566  [92m☑[0m 566 
Q 757+691 T 1448 [92m☑[0m 1448
Q 18+57   T 75   [92m☑[0m 75  
Q 81+374  T 455  [92m☑[0m 455 
Q 3+717   T 720  [92m☑[0m 720 
Q 88+7    T 95   [92m☑[0m 95  
Q 78+737  T 815  [92m☑[0m 815 
Q 487+757 T 1244 [92m☑[0m 1244

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 307+1   T 308  [92m☑[0m 308 
Q 482+245 T 727  [92m☑[0m 727 
Q 182+268 T 450  [92m☑[0m 450 
Q 803+91  T 894  [92m☑[0m 894 
Q 761+591 T 1352 [92m☑[0m 1352
Q 662+452 T 1114 [92m☑[0m 1114
Q 0+398   T 398  [91m☒[0m 397 
Q 30+3    T 33   [91m☒[0m 34  
Q 207+517 T 724  [92m☑[0m 724 
Q 923+828 T 1751 [92m☑[0m 1751

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 47+241  T 288  [92m☑[0m 288 
Q 740+77  T 817  [92m☑[0m 817 
Q 51+878  T 929  [92m☑[0

Q 739+24  T 763  [92m☑[0m 763 
Q 8+39    T 47   [92m☑[0m 47  
Q 804+497 T 1301 [92m☑[0m 1301
Q 544+754 T 1298 [92m☑[0m 1298
Q 82+839  T 921  [92m☑[0m 921 
Q 632+838 T 1470 [92m☑[0m 1470
Q 281+987 T 1268 [92m☑[0m 1268
Q 220+3   T 223  [92m☑[0m 223 
Q 23+221  T 244  [92m☑[0m 244 
Q 275+643 T 918  [92m☑[0m 918 

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 653+57  T 710  [92m☑[0m 710 
Q 412+211 T 623  [92m☑[0m 623 
Q 27+19   T 46   [92m☑[0m 46  
Q 924+4   T 928  [92m☑[0m 928 
Q 8+114   T 122  [92m☑[0m 122 
Q 232+794 T 1026 [92m☑[0m 1026
Q 545+7   T 552  [92m☑[0m 552 
Q 67+376  T 443  [92m☑[0m 443 
Q 46+438  T 484  [92m☑[0m 484 
Q 77+150  T 227  [92m☑[0m 227 

--------------------------------------------------
Iteration 31
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 976+471 T 1447 [92m☑[0m 1447
Q 67+939  T 1006 [92m☑[0m 1006
Q 256+45  T 301  [92m☑[0

Q 49+857  T 906  [92m☑[0m 906 
Q 498+821 T 1319 [92m☑[0m 1319
Q 488+261 T 749  [92m☑[0m 749 
Q 373+391 T 764  [92m☑[0m 764 
Q 11+39   T 50   [92m☑[0m 50  
Q 98+133  T 231  [92m☑[0m 231 
Q 3+245   T 248  [92m☑[0m 248 
Q 22+45   T 67   [92m☑[0m 67  
Q 664+387 T 1051 [92m☑[0m 1051
Q 379+20  T 399  [92m☑[0m 399 

--------------------------------------------------
Iteration 44
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 4+584   T 588  [92m☑[0m 588 
Q 519+67  T 586  [92m☑[0m 586 
Q 444+3   T 447  [92m☑[0m 447 
Q 234+23  T 257  [92m☑[0m 257 
Q 916+6   T 922  [92m☑[0m 922 
Q 71+631  T 702  [92m☑[0m 702 
Q 587+1   T 588  [92m☑[0m 588 
Q 584+551 T 1135 [92m☑[0m 1135
Q 41+14   T 55   [92m☑[0m 55  
Q 105+712 T 817  [92m☑[0m 817 

--------------------------------------------------
Iteration 45
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 458+498 T 956  [92m☑[0m 956 
Q 559+41  T 600  [92m☑[0m 600 
Q 874+145 T 1019 [92m☑[0

Q 29+621  T 650  [92m☑[0m 650 
Q 820+994 T 1814 [92m☑[0m 1814
Q 81+630  T 711  [92m☑[0m 711 
Q 551+74  T 625  [92m☑[0m 625 
Q 21+914  T 935  [92m☑[0m 935 
Q 1+954   T 955  [92m☑[0m 955 
Q 249+24  T 273  [92m☑[0m 273 
Q 35+75   T 110  [92m☑[0m 110 
Q 411+3   T 414  [92m☑[0m 414 
Q 305+74  T 379  [92m☑[0m 379 

--------------------------------------------------
Iteration 58
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 495+868 T 1363 [92m☑[0m 1363
Q 6+710   T 716  [92m☑[0m 716 
Q 55+908  T 963  [92m☑[0m 963 
Q 711+13  T 724  [92m☑[0m 724 
Q 41+91   T 132  [92m☑[0m 132 
Q 4+592   T 596  [92m☑[0m 596 
Q 899+605 T 1504 [92m☑[0m 1504
Q 331+100 T 431  [92m☑[0m 431 
Q 78+27   T 105  [92m☑[0m 105 
Q 51+53   T 104  [92m☑[0m 104 

--------------------------------------------------
Iteration 59
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 2+610   T 612  [92m☑[0m 612 
Q 838+51  T 889  [92m☑[0m 889 
Q 7+629   T 636  [92m☑[0

Q 133+43  T 176  [92m☑[0m 176 
Q 81+90   T 171  [92m☑[0m 171 
Q 70+870  T 940  [92m☑[0m 940 
Q 228+49  T 277  [92m☑[0m 277 
Q 471+983 T 1454 [92m☑[0m 1454
Q 874+1   T 875  [92m☑[0m 875 
Q 479+231 T 710  [92m☑[0m 710 
Q 199+107 T 306  [92m☑[0m 306 
Q 39+951  T 990  [92m☑[0m 990 
Q 640+68  T 708  [92m☑[0m 708 

--------------------------------------------------
Iteration 72
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 358+92  T 450  [92m☑[0m 450 
Q 610+59  T 669  [92m☑[0m 669 
Q 859+99  T 958  [92m☑[0m 958 
Q 31+38   T 69   [92m☑[0m 69  
Q 96+234  T 330  [92m☑[0m 330 
Q 541+492 T 1033 [92m☑[0m 1033
Q 156+97  T 253  [92m☑[0m 253 
Q 650+841 T 1491 [92m☑[0m 1491
Q 849+528 T 1377 [92m☑[0m 1377
Q 6+594   T 600  [92m☑[0m 600 

--------------------------------------------------
Iteration 73
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 655+8   T 663  [92m☑[0m 663 
Q 128+36  T 164  [92m☑[0m 164 
Q 482+993 T 1475 [92m☑[0

Q 4+122   T 126  [92m☑[0m 126 
Q 68+169  T 237  [92m☑[0m 237 
Q 902+695 T 1597 [92m☑[0m 1597
Q 94+744  T 838  [92m☑[0m 838 
Q 48+945  T 993  [92m☑[0m 993 
Q 223+89  T 312  [92m☑[0m 312 
Q 109+52  T 161  [92m☑[0m 161 
Q 1+645   T 646  [92m☑[0m 646 
Q 209+314 T 523  [92m☑[0m 523 
Q 799+8   T 807  [92m☑[0m 807 

--------------------------------------------------
Iteration 86
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 589+4   T 593  [92m☑[0m 593 
Q 8+968   T 976  [92m☑[0m 976 
Q 926+12  T 938  [92m☑[0m 938 
Q 3+898   T 901  [92m☑[0m 901 
Q 12+605  T 617  [92m☑[0m 617 
Q 709+28  T 737  [92m☑[0m 737 
Q 89+1    T 90   [92m☑[0m 90  
Q 789+821 T 1610 [92m☑[0m 1610
Q 332+574 T 906  [92m☑[0m 906 
Q 456+514 T 970  [92m☑[0m 970 

--------------------------------------------------
Iteration 87
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 809+469 T 1278 [92m☑[0m 1278
Q 446+94  T 540  [92m☑[0m 540 
Q 928+97  T 1025 [92m☑[0

Q 280+55  T 335  [92m☑[0m 335 
Q 76+212  T 288  [92m☑[0m 288 
Q 1+843   T 844  [92m☑[0m 844 
Q 443+758 T 1201 [92m☑[0m 1201
Q 217+488 T 705  [92m☑[0m 705 
Q 305+6   T 311  [92m☑[0m 311 
Q 41+759  T 800  [92m☑[0m 800 
Q 51+692  T 743  [92m☑[0m 743 
Q 171+67  T 238  [92m☑[0m 238 
Q 935+236 T 1171 [92m☑[0m 1171

--------------------------------------------------
Iteration 100
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 339+41  T 380  [92m☑[0m 380 
Q 21+13   T 34   [92m☑[0m 34  
Q 467+65  T 532  [92m☑[0m 532 
Q 68+540  T 608  [92m☑[0m 608 
Q 77+336  T 413  [92m☑[0m 413 
Q 783+9   T 792  [92m☑[0m 792 
Q 191+610 T 801  [92m☑[0m 801 
Q 266+15  T 281  [92m☑[0m 281 
Q 534+684 T 1218 [92m☑[0m 1218
Q 79+72   T 151  [92m☑[0m 151 

--------------------------------------------------
Iteration 101
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 46+449  T 495  [92m☑[0m 495 
Q 62+16   T 78   [92m☑[0m 78  
Q 3+795   T 798  [92m☑

Q 48+841  T 889  [92m☑[0m 889 
Q 47+241  T 288  [92m☑[0m 288 
Q 6+360   T 366  [91m☒[0m 356 
Q 79+72   T 151  [92m☑[0m 151 
Q 244+88  T 332  [92m☑[0m 332 
Q 518+60  T 578  [92m☑[0m 578 
Q 75+853  T 928  [92m☑[0m 928 
Q 4+517   T 521  [91m☒[0m 520 
Q 23+35   T 58   [92m☑[0m 58  
Q 616+402 T 1018 [92m☑[0m 1018

--------------------------------------------------
Iteration 114
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 3+18    T 21   [92m☑[0m 21  
Q 564+828 T 1392 [92m☑[0m 1392
Q 585+64  T 649  [92m☑[0m 649 
Q 7+115   T 122  [92m☑[0m 122 
Q 920+57  T 977  [92m☑[0m 977 
Q 571+881 T 1452 [92m☑[0m 1452
Q 827+585 T 1412 [92m☑[0m 1412
Q 894+96  T 990  [92m☑[0m 990 
Q 790+7   T 797  [92m☑[0m 797 
Q 870+98  T 968  [92m☑[0m 968 

--------------------------------------------------
Iteration 115
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 914+113 T 1027 [92m☑[0m 1027
Q 837+187 T 1024 [92m☑[0m 1024
Q 11+785  T 796  [92m☑

Q 949+400 T 1349 [92m☑[0m 1349
Q 49+829  T 878  [92m☑[0m 878 
Q 424+635 T 1059 [92m☑[0m 1059
Q 180+129 T 309  [92m☑[0m 309 
Q 43+318  T 361  [92m☑[0m 361 
Q 699+2   T 701  [92m☑[0m 701 
Q 917+793 T 1710 [92m☑[0m 1710
Q 252+484 T 736  [92m☑[0m 736 
Q 3+250   T 253  [92m☑[0m 253 
Q 20+649  T 669  [92m☑[0m 669 

--------------------------------------------------
Iteration 128
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 996+73  T 1069 [92m☑[0m 1069
Q 167+4   T 171  [92m☑[0m 171 
Q 447+384 T 831  [92m☑[0m 831 
Q 375+100 T 475  [92m☑[0m 475 
Q 915+95  T 1010 [92m☑[0m 1010
Q 216+52  T 268  [92m☑[0m 268 
Q 130+781 T 911  [92m☑[0m 911 
Q 203+669 T 872  [92m☑[0m 872 
Q 509+1   T 510  [92m☑[0m 510 
Q 720+65  T 785  [92m☑[0m 785 

--------------------------------------------------
Iteration 129
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 799+56  T 855  [92m☑[0m 855 
Q 471+118 T 589  [92m☑[0m 589 
Q 2+695   T 697  [92m☑

Q 5+691   T 696  [92m☑[0m 696 
Q 15+177  T 192  [92m☑[0m 192 
Q 4+306   T 310  [92m☑[0m 310 
Q 835+53  T 888  [92m☑[0m 888 
Q 798+28  T 826  [92m☑[0m 826 
Q 58+535  T 593  [92m☑[0m 593 
Q 832+371 T 1203 [92m☑[0m 1203
Q 49+2    T 51   [92m☑[0m 51  
Q 588+115 T 703  [92m☑[0m 703 
Q 462+108 T 570  [92m☑[0m 570 

--------------------------------------------------
Iteration 142
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 563+555 T 1118 [92m☑[0m 1118
Q 9+805   T 814  [92m☑[0m 814 
Q 7+453   T 460  [92m☑[0m 460 
Q 56+773  T 829  [92m☑[0m 829 
Q 8+607   T 615  [92m☑[0m 615 
Q 309+235 T 544  [92m☑[0m 544 
Q 80+229  T 309  [92m☑[0m 309 
Q 699+440 T 1139 [92m☑[0m 1139
Q 973+396 T 1369 [92m☑[0m 1369
Q 34+108  T 142  [92m☑[0m 142 

--------------------------------------------------
Iteration 143
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 571+61  T 632  [92m☑[0m 632 
Q 67+195  T 262  [92m☑[0m 262 
Q 991+649 T 1640 [92m☑

Q 177+41  T 218  [92m☑[0m 218 
Q 71+98   T 169  [92m☑[0m 169 
Q 899+26  T 925  [92m☑[0m 925 
Q 530+702 T 1232 [92m☑[0m 1232
Q 14+90   T 104  [92m☑[0m 104 
Q 0+367   T 367  [92m☑[0m 367 
Q 701+512 T 1213 [92m☑[0m 1213
Q 313+31  T 344  [92m☑[0m 344 
Q 5+625   T 630  [92m☑[0m 630 
Q 508+0   T 508  [92m☑[0m 508 

--------------------------------------------------
Iteration 156
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 986+79  T 1065 [92m☑[0m 1065
Q 266+4   T 270  [92m☑[0m 270 
Q 88+89   T 177  [92m☑[0m 177 
Q 848+793 T 1641 [92m☑[0m 1641
Q 145+616 T 761  [92m☑[0m 761 
Q 248+715 T 963  [92m☑[0m 963 
Q 940+882 T 1822 [92m☑[0m 1822
Q 53+281  T 334  [92m☑[0m 334 
Q 657+88  T 745  [92m☑[0m 745 
Q 271+6   T 277  [92m☑[0m 277 

--------------------------------------------------
Iteration 157
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 41+803  T 844  [92m☑[0m 844 
Q 501+508 T 1009 [92m☑[0m 1009
Q 270+823 T 1093 [92m☑

Q 335+23  T 358  [92m☑[0m 358 
Q 306+868 T 1174 [92m☑[0m 1174
Q 193+5   T 198  [92m☑[0m 198 
Q 171+77  T 248  [92m☑[0m 248 
Q 682+98  T 780  [92m☑[0m 780 
Q 221+440 T 661  [92m☑[0m 661 
Q 802+26  T 828  [92m☑[0m 828 
Q 899+26  T 925  [92m☑[0m 925 
Q 26+42   T 68   [92m☑[0m 68  
Q 613+4   T 617  [92m☑[0m 617 

--------------------------------------------------
Iteration 170
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 556+979 T 1535 [92m☑[0m 1535
Q 764+7   T 771  [92m☑[0m 771 
Q 5+336   T 341  [92m☑[0m 341 
Q 291+14  T 305  [92m☑[0m 305 
Q 134+81  T 215  [92m☑[0m 215 
Q 32+394  T 426  [92m☑[0m 426 
Q 26+31   T 57   [92m☑[0m 57  
Q 482+352 T 834  [92m☑[0m 834 
Q 8+114   T 122  [92m☑[0m 122 
Q 799+132 T 931  [92m☑[0m 931 

--------------------------------------------------
Iteration 171
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 48+22   T 70   [92m☑[0m 70  
Q 48+10   T 58   [92m☑[0m 58  
Q 7+976   T 983  [92m☑

Q 45+53   T 98   [92m☑[0m 98  
Q 870+282 T 1152 [92m☑[0m 1152
Q 934+988 T 1922 [92m☑[0m 1922
Q 6+963   T 969  [92m☑[0m 969 
Q 234+37  T 271  [92m☑[0m 271 
Q 83+744  T 827  [92m☑[0m 827 
Q 5+168   T 173  [92m☑[0m 173 
Q 92+905  T 997  [92m☑[0m 997 
Q 7+712   T 719  [92m☑[0m 719 
Q 862+49  T 911  [92m☑[0m 911 

--------------------------------------------------
Iteration 184
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 793+8   T 801  [92m☑[0m 801 
Q 663+607 T 1270 [92m☑[0m 1270
Q 4+349   T 353  [92m☑[0m 353 
Q 991+570 T 1561 [92m☑[0m 1561
Q 63+745  T 808  [92m☑[0m 808 
Q 590+9   T 599  [92m☑[0m 599 
Q 278+143 T 421  [92m☑[0m 421 
Q 7+732   T 739  [92m☑[0m 739 
Q 536+79  T 615  [92m☑[0m 615 
Q 334+87  T 421  [92m☑[0m 421 

--------------------------------------------------
Iteration 185
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 35+632  T 667  [92m☑[0m 667 
Q 2+900   T 902  [92m☑[0m 902 
Q 246+136 T 382  [92m☑

Q 983+611 T 1594 [92m☑[0m 1594
Q 443+758 T 1201 [92m☑[0m 1201
Q 960+6   T 966  [92m☑[0m 966 
Q 35+94   T 129  [92m☑[0m 129 
Q 589+527 T 1116 [92m☑[0m 1116
Q 177+0   T 177  [92m☑[0m 177 
Q 887+782 T 1669 [92m☑[0m 1669
Q 427+8   T 435  [92m☑[0m 435 
Q 583+63  T 646  [92m☑[0m 646 
Q 514+871 T 1385 [92m☑[0m 1385

--------------------------------------------------
Iteration 198
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 387+76  T 463  [92m☑[0m 463 
Q 149+93  T 242  [92m☑[0m 242 
Q 3+18    T 21   [92m☑[0m 21  
Q 45+3    T 48   [92m☑[0m 48  
Q 852+628 T 1480 [92m☑[0m 1480
Q 424+75  T 499  [92m☑[0m 499 
Q 18+818  T 836  [92m☑[0m 836 
Q 151+396 T 547  [92m☑[0m 547 
Q 24+833  T 857  [92m☑[0m 857 
Q 55+11   T 66   [92m☑[0m 66  

--------------------------------------------------
Iteration 199
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 7+216   T 223  [92m☑[0m 223 
Q 401+49  T 450  [92m☑[0m 450 
Q 112+712 T 824  [92m☑

In [31]:
model.save('rnn_model_%siter.h5' %no_iter)