In [1]:
from __future__ import print_function
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


In [2]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one-hot integer representation
    + Decode the one-hot or integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.

        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One-hot encode given string C.

        # Arguments
            C: string, to be encoded.
            num_rows: Number of rows in the returned one-hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        """Decode the given vector or 2D array to their character output.

        # Arguments
            x: A vector or a 2D array of probabilities or one-hot representations;
                or a vector of character indices (used with `calc_argmax=False`).
            calc_argmax: Whether to find the character index with maximum
                probability, defaults to `True`.
        """
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [3]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [4]:
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 50000


In [5]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [6]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

In [15]:
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)
print(x_train[0])
ctable.decode(x_train[0])

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)
[[ True False False False False False False False False False False False]
 [False False False False False False  True False False False False False]
 [False False False False False False  True False False False False False]
 [False False False False False False  True False False False False False]
 [False  True False False False False False False False False False False]
 [False False False False False False  True False False False False False]
 [False False False False False False  True False False False False False]]


' 444+44'

In [8]:
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last output of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [9]:
for iteration in range(1, 200):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 170+558 T 728  [91m☒[0m 107 
Q 793+606 T 1399 [91m☒[0m 100 
Q 654+11  T 665  [91m☒[0m 127 
Q 430+841 T 1271 [91m☒[0m 107 
Q 916+206 T 1122 [91m☒[0m 1007
Q 94+848  T 942  [91m☒[0m 100 
Q 31+271  T 302  [91m☒[0m 127 
Q 172+83  T 255  [91m☒[0m 127 
Q 11+122  T 133  [91m☒[0m 12  
Q 504+52  T 556  [91m☒[0m 127 

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 42+684  T 726  [91m☒[0m 566 
Q 98+138  T 236  [91m☒[0m 199 
Q 263+0   T 263  [91m☒[0m 33  
Q 61+187  T 248  [91m☒[0m 122 
Q 398+165 T 563  [91m☒[0m 906 
Q 62+297  T 359  [91m☒[0m 322 
Q 59+175  T 234  [91m☒[0m 556 
Q 91+18   T 109  [91m☒[0m 110 
Q 805+141 T 946  [91m☒[0m 104 
Q 593+65  T 658  [91m☒[0m 696 

--------------------------------------------------
Iteration 3
Train on 45000 samples, valida

Q 70+884  T 954  [92m☑[0m 954 
Q 87+245  T 332  [92m☑[0m 332 
Q 403+226 T 629  [91m☒[0m 639 
Q 783+459 T 1242 [92m☑[0m 1242
Q 595+456 T 1051 [92m☑[0m 1051
Q 710+659 T 1369 [92m☑[0m 1369
Q 41+329  T 370  [91m☒[0m 360 
Q 14+95   T 109  [92m☑[0m 109 
Q 36+312  T 348  [92m☑[0m 348 
Q 559+138 T 697  [91m☒[0m 797 

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 937+50  T 987  [92m☑[0m 987 
Q 797+61  T 858  [92m☑[0m 858 
Q 74+602  T 676  [92m☑[0m 676 
Q 909+173 T 1082 [92m☑[0m 1082
Q 697+2   T 699  [92m☑[0m 699 
Q 191+565 T 756  [92m☑[0m 756 
Q 860+88  T 948  [92m☑[0m 948 
Q 881+98  T 979  [92m☑[0m 979 
Q 449+560 T 1009 [92m☑[0m 1009
Q 612+40  T 652  [92m☑[0m 652 

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 24+24   T 48   [92m☑[0m 48  
Q 16+73   T 89   [91m☒[0m 99  
Q 639+89  T 728  [92m☑[0

Q 723+531 T 1254 [92m☑[0m 1254
Q 903+693 T 1596 [92m☑[0m 1596
Q 51+319  T 370  [92m☑[0m 370 
Q 145+9   T 154  [92m☑[0m 154 
Q 94+574  T 668  [92m☑[0m 668 
Q 616+91  T 707  [92m☑[0m 707 
Q 681+2   T 683  [92m☑[0m 683 
Q 83+209  T 292  [92m☑[0m 292 
Q 64+273  T 337  [92m☑[0m 337 
Q 42+547  T 589  [92m☑[0m 589 

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 101+866 T 967  [91m☒[0m 966 
Q 55+457  T 512  [92m☑[0m 512 
Q 5+696   T 701  [92m☑[0m 701 
Q 619+3   T 622  [92m☑[0m 622 
Q 702+899 T 1601 [92m☑[0m 1601
Q 7+19    T 26   [92m☑[0m 26  
Q 203+76  T 279  [92m☑[0m 279 
Q 50+164  T 214  [92m☑[0m 214 
Q 394+499 T 893  [91m☒[0m 883 
Q 615+95  T 710  [92m☑[0m 710 

--------------------------------------------------
Iteration 31
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 775+58  T 833  [92m☑[0m 833 
Q 218+0   T 218  [92m☑[0m 218 
Q 18+792  T 810  [92m☑[0

Q 85+42   T 127  [92m☑[0m 127 
Q 116+857 T 973  [92m☑[0m 973 
Q 84+225  T 309  [92m☑[0m 309 
Q 51+94   T 145  [92m☑[0m 145 
Q 293+640 T 933  [92m☑[0m 933 
Q 638+40  T 678  [92m☑[0m 678 
Q 200+722 T 922  [92m☑[0m 922 
Q 48+388  T 436  [92m☑[0m 436 
Q 617+0   T 617  [92m☑[0m 617 
Q 470+46  T 516  [92m☑[0m 516 

--------------------------------------------------
Iteration 44
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 443+7   T 450  [92m☑[0m 450 
Q 306+11  T 317  [92m☑[0m 317 
Q 81+486  T 567  [92m☑[0m 567 
Q 4+283   T 287  [92m☑[0m 287 
Q 817+894 T 1711 [92m☑[0m 1711
Q 61+636  T 697  [92m☑[0m 697 
Q 14+328  T 342  [92m☑[0m 342 
Q 64+230  T 294  [92m☑[0m 294 
Q 502+28  T 530  [92m☑[0m 530 
Q 94+801  T 895  [92m☑[0m 895 

--------------------------------------------------
Iteration 45
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 3+64    T 67   [92m☑[0m 67  
Q 698+46  T 744  [92m☑[0m 744 
Q 18+318  T 336  [92m☑[0

Q 881+41  T 922  [92m☑[0m 922 
Q 385+233 T 618  [92m☑[0m 618 
Q 193+42  T 235  [92m☑[0m 235 
Q 629+77  T 706  [92m☑[0m 706 
Q 4+528   T 532  [92m☑[0m 532 
Q 505+14  T 519  [92m☑[0m 519 
Q 45+507  T 552  [92m☑[0m 552 
Q 73+37   T 110  [92m☑[0m 110 
Q 33+293  T 326  [92m☑[0m 326 
Q 558+62  T 620  [92m☑[0m 620 

--------------------------------------------------
Iteration 58
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 19+80   T 99   [92m☑[0m 99  
Q 936+583 T 1519 [92m☑[0m 1519
Q 17+57   T 74   [92m☑[0m 74  
Q 646+41  T 687  [92m☑[0m 687 
Q 508+945 T 1453 [92m☑[0m 1453
Q 335+140 T 475  [92m☑[0m 475 
Q 63+408  T 471  [92m☑[0m 471 
Q 57+41   T 98   [92m☑[0m 98  
Q 18+82   T 100  [92m☑[0m 100 
Q 2+121   T 123  [92m☑[0m 123 

--------------------------------------------------
Iteration 59
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 43+762  T 805  [92m☑[0m 805 
Q 185+5   T 190  [92m☑[0m 190 
Q 54+442  T 496  [92m☑[0

Q 851+43  T 894  [92m☑[0m 894 
Q 3+389   T 392  [92m☑[0m 392 
Q 56+113  T 169  [92m☑[0m 169 
Q 435+24  T 459  [92m☑[0m 459 
Q 39+55   T 94   [92m☑[0m 94  
Q 786+1   T 787  [92m☑[0m 787 
Q 119+752 T 871  [92m☑[0m 871 
Q 6+904   T 910  [92m☑[0m 910 
Q 32+636  T 668  [92m☑[0m 668 
Q 354+83  T 437  [92m☑[0m 437 

--------------------------------------------------
Iteration 72
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 456+48  T 504  [92m☑[0m 504 
Q 42+693  T 735  [92m☑[0m 735 
Q 18+60   T 78   [92m☑[0m 78  
Q 86+75   T 161  [92m☑[0m 161 
Q 148+58  T 206  [92m☑[0m 206 
Q 429+5   T 434  [92m☑[0m 434 
Q 41+612  T 653  [92m☑[0m 653 
Q 583+682 T 1265 [92m☑[0m 1265
Q 40+84   T 124  [92m☑[0m 124 
Q 884+60  T 944  [92m☑[0m 944 

--------------------------------------------------
Iteration 73
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 98+656  T 754  [92m☑[0m 754 
Q 869+771 T 1640 [92m☑[0m 1640
Q 387+22  T 409  [92m☑[0

Q 858+587 T 1445 [92m☑[0m 1445
Q 84+865  T 949  [92m☑[0m 949 
Q 616+97  T 713  [92m☑[0m 713 
Q 95+15   T 110  [92m☑[0m 110 
Q 966+35  T 1001 [92m☑[0m 1001
Q 715+468 T 1183 [92m☑[0m 1183
Q 80+441  T 521  [92m☑[0m 521 
Q 566+66  T 632  [92m☑[0m 632 
Q 309+200 T 509  [92m☑[0m 509 
Q 32+267  T 299  [92m☑[0m 299 

--------------------------------------------------
Iteration 86
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 36+276  T 312  [92m☑[0m 312 
Q 33+604  T 637  [92m☑[0m 637 
Q 70+958  T 1028 [92m☑[0m 1028
Q 126+20  T 146  [92m☑[0m 146 
Q 233+1   T 234  [92m☑[0m 234 
Q 616+91  T 707  [92m☑[0m 707 
Q 189+24  T 213  [92m☑[0m 213 
Q 42+523  T 565  [92m☑[0m 565 
Q 912+388 T 1300 [92m☑[0m 1300
Q 21+15   T 36   [92m☑[0m 36  

--------------------------------------------------
Iteration 87
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 36+660  T 696  [92m☑[0m 696 
Q 95+0    T 95   [92m☑[0m 95  
Q 518+3   T 521  [92m☑[0

Q 56+113  T 169  [92m☑[0m 169 
Q 23+921  T 944  [92m☑[0m 944 
Q 17+259  T 276  [92m☑[0m 276 
Q 96+76   T 172  [92m☑[0m 172 
Q 611+334 T 945  [92m☑[0m 945 
Q 6+463   T 469  [92m☑[0m 469 
Q 307+63  T 370  [92m☑[0m 370 
Q 681+990 T 1671 [92m☑[0m 1671
Q 41+672  T 713  [92m☑[0m 713 
Q 201+369 T 570  [92m☑[0m 570 

--------------------------------------------------
Iteration 100
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 64+445  T 509  [92m☑[0m 509 
Q 773+4   T 777  [92m☑[0m 777 
Q 667+236 T 903  [92m☑[0m 903 
Q 918+370 T 1288 [92m☑[0m 1288
Q 810+24  T 834  [92m☑[0m 834 
Q 38+504  T 542  [92m☑[0m 542 
Q 40+37   T 77   [92m☑[0m 77  
Q 37+53   T 90   [92m☑[0m 90  
Q 68+70   T 138  [92m☑[0m 138 
Q 745+726 T 1471 [92m☑[0m 1471

--------------------------------------------------
Iteration 101
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 84+225  T 309  [92m☑[0m 309 
Q 354+36  T 390  [92m☑[0m 390 
Q 396+185 T 581  [92m☑

Q 798+570 T 1368 [92m☑[0m 1368
Q 676+85  T 761  [92m☑[0m 761 
Q 18+792  T 810  [92m☑[0m 810 
Q 526+21  T 547  [92m☑[0m 547 
Q 99+950  T 1049 [92m☑[0m 1049
Q 4+994   T 998  [92m☑[0m 998 
Q 14+328  T 342  [92m☑[0m 342 
Q 72+226  T 298  [92m☑[0m 298 
Q 657+783 T 1440 [92m☑[0m 1440
Q 670+386 T 1056 [92m☑[0m 1056

--------------------------------------------------
Iteration 114
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 609+5   T 614  [92m☑[0m 614 
Q 7+549   T 556  [92m☑[0m 556 
Q 7+689   T 696  [92m☑[0m 696 
Q 938+63  T 1001 [92m☑[0m 1001
Q 43+548  T 591  [92m☑[0m 591 
Q 24+277  T 301  [92m☑[0m 301 
Q 61+91   T 152  [92m☑[0m 152 
Q 503+870 T 1373 [92m☑[0m 1373
Q 661+764 T 1425 [92m☑[0m 1425
Q 5+18    T 23   [92m☑[0m 23  

--------------------------------------------------
Iteration 115
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 26+538  T 564  [92m☑[0m 564 
Q 475+1   T 476  [92m☑[0m 476 
Q 272+71  T 343  [92m☑

Q 411+96  T 507  [92m☑[0m 507 
Q 3+551   T 554  [92m☑[0m 554 
Q 764+3   T 767  [92m☑[0m 767 
Q 9+210   T 219  [92m☑[0m 219 
Q 544+7   T 551  [92m☑[0m 551 
Q 443+79  T 522  [92m☑[0m 522 
Q 140+43  T 183  [92m☑[0m 183 
Q 674+224 T 898  [92m☑[0m 898 
Q 671+77  T 748  [92m☑[0m 748 
Q 790+204 T 994  [92m☑[0m 994 

--------------------------------------------------
Iteration 128
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 461+582 T 1043 [92m☑[0m 1043
Q 430+83  T 513  [92m☑[0m 513 
Q 520+470 T 990  [92m☑[0m 990 
Q 97+83   T 180  [92m☑[0m 180 
Q 917+38  T 955  [92m☑[0m 955 
Q 612+54  T 666  [92m☑[0m 666 
Q 6+189   T 195  [92m☑[0m 195 
Q 2+602   T 604  [92m☑[0m 604 
Q 95+281  T 376  [92m☑[0m 376 
Q 1+20    T 21   [92m☑[0m 21  

--------------------------------------------------
Iteration 129
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 2+819   T 821  [92m☑[0m 821 
Q 44+162  T 206  [92m☑[0m 206 
Q 5+942   T 947  [92m☑

KeyboardInterrupt: 