In [13]:
from __future__ import print_function
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

In [3]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i,c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i,c in enumerate(self.chars))
        
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        
        return ''.join(self.indices_char[x] for x in x)
    
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [8]:
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = True

MAXLEN = DIGITS + 1 + DIGITS

chars = '0123456789+ '
ctable = CharacterTable(chars)

questions = []
expected = []
seen = set()
print("generating data...")

while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                           for i in range(np.random.randint(1, DIGITS + 1))))
    
    a, b = f(), f()
    
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    
    seen.add(key)
    
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a+b)
    
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    
    questions.append(query)
    expected.append(ans)
    
print('Total addition questions: ', len(questions))

generating data...
Total addition questions:  50000


In [19]:
print(questions[0])
print(expected[0])
print(questions[1])
print(expected[1])
print(questions[2])
print(expected[2])

  266+6
668 
 396+01
703 
  3+257
755 


In [9]:
print('Vectorization...')

x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)

for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
    
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)


Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

In [11]:
print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [14]:
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [15]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))

for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))
    
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [21]:
for iteration in range(1, 200):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_val, y_val))
    
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q',q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + ' O ' + colors.close, end=' ')
        else:
            print(colors.fail + ' X '+colors.close, end = ' ')
        
        print(guess)


--------------------------------------------------
Iteration 1
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 186+521 T 707  [91m X [0m 818 
Q 636+833 T 1469 [91m X [0m 1328
Q 490+239 T 729  [91m X [0m 714 
Q 5+19    T 24   [91m X [0m 11  
Q 238+988 T 1226 [91m X [0m 1218
Q 98+86   T 184  [91m X [0m 188 
Q 161+33  T 194  [91m X [0m 274 
Q 91+522  T 613  [91m X [0m 501 
Q 463+36  T 499  [91m X [0m 441 
Q 23+223  T 246  [91m X [0m 334 

--------------------------------------------------
Iteration 2
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 75+140  T 215  [91m X [0m 184 
Q 117+13  T 130  [91m X [0m 124 
Q 389+960 T 1349 [91m X [0m 1384
Q 938+55  T 993  [91m X [0m 900 
Q 653+638 T 1291 [91m X [0m 1366
Q 542+918 T 1460 [91m X [0m 1466
Q 575+57  T 632  [91m X [0m 544 
Q 599+13  T 612  [91m X [0m 566 
Q 922+932 T 1854 [91m X [0m 1780
Q 572+48  T 620  [91m X [0m 646 

--------------------------------------------------
It

Q 15+945  T 960  [91m X [0m 961 
Q 3+329   T 332  [92m O [0m 332 
Q 80+41   T 121  [92m O [0m 121 
Q 263+593 T 856  [92m O [0m 856 
Q 54+107  T 161  [92m O [0m 161 
Q 129+364 T 493  [92m O [0m 493 
Q 64+49   T 113  [92m O [0m 113 
Q 22+45   T 67   [91m X [0m 77  
Q 406+715 T 1121 [92m O [0m 1121
Q 714+60  T 774  [92m O [0m 774 

--------------------------------------------------
Iteration 16
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 70+59   T 129  [92m O [0m 129 
Q 83+71   T 154  [92m O [0m 154 
Q 740+12  T 752  [92m O [0m 752 
Q 574+366 T 940  [91m X [0m 930 
Q 8+3     T 11   [91m X [0m 12  
Q 81+735  T 816  [92m O [0m 816 
Q 9+366   T 375  [92m O [0m 375 
Q 763+9   T 772  [92m O [0m 772 
Q 76+894  T 970  [92m O [0m 970 
Q 783+55  T 838  [92m O [0m 838 

--------------------------------------------------
Iteration 17
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 58+17   T 75   [92m O [0m 75  
Q 29+30   T 59   

Q 758+12  T 770  [92m O [0m 770 
Q 716+7   T 723  [92m O [0m 723 
Q 48+71   T 119  [92m O [0m 119 
Q 923+7   T 930  [92m O [0m 930 
Q 13+436  T 449  [91m X [0m 459 
Q 49+627  T 676  [92m O [0m 676 
Q 780+51  T 831  [92m O [0m 831 
Q 860+20  T 880  [92m O [0m 880 
Q 704+722 T 1426 [92m O [0m 1426
Q 6+953   T 959  [92m O [0m 959 

--------------------------------------------------
Iteration 30
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 657+482 T 1139 [92m O [0m 1139
Q 368+6   T 374  [92m O [0m 374 
Q 902+95  T 997  [92m O [0m 997 
Q 9+885   T 894  [92m O [0m 894 
Q 43+919  T 962  [92m O [0m 962 
Q 604+566 T 1170 [92m O [0m 1170
Q 984+63  T 1047 [92m O [0m 1047
Q 37+141  T 178  [92m O [0m 178 
Q 276+463 T 739  [92m O [0m 739 
Q 85+70   T 155  [92m O [0m 155 

--------------------------------------------------
Iteration 31
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 1+762   T 763  [92m O [0m 763 
Q 922+933 T 1855 

Q 962+731 T 1693 [92m O [0m 1693
Q 468+859 T 1327 [92m O [0m 1327
Q 75+24   T 99   [92m O [0m 99  
Q 1+706   T 707  [92m O [0m 707 
Q 90+582  T 672  [92m O [0m 672 
Q 8+168   T 176  [92m O [0m 176 
Q 22+556  T 578  [92m O [0m 578 
Q 962+41  T 1003 [92m O [0m 1003
Q 465+7   T 472  [92m O [0m 472 
Q 495+773 T 1268 [92m O [0m 1268

--------------------------------------------------
Iteration 44
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 433+4   T 437  [92m O [0m 437 
Q 503+9   T 512  [92m O [0m 512 
Q 202+72  T 274  [92m O [0m 274 
Q 182+9   T 191  [92m O [0m 191 
Q 955+1   T 956  [92m O [0m 956 
Q 371+9   T 380  [92m O [0m 380 
Q 97+156  T 253  [92m O [0m 253 
Q 45+99   T 144  [92m O [0m 144 
Q 364+25  T 389  [92m O [0m 389 
Q 21+251  T 272  [92m O [0m 272 

--------------------------------------------------
Iteration 45
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 15+539  T 554  [92m O [0m 554 
Q 931+0   T 931  

Q 204+280 T 484  [92m O [0m 484 
Q 124+45  T 169  [92m O [0m 169 
Q 554+176 T 730  [92m O [0m 730 
Q 464+843 T 1307 [92m O [0m 1307
Q 39+622  T 661  [92m O [0m 661 
Q 57+663  T 720  [92m O [0m 720 
Q 345+893 T 1238 [91m X [0m 1138
Q 465+42  T 507  [92m O [0m 507 
Q 698+22  T 720  [92m O [0m 720 
Q 692+925 T 1617 [92m O [0m 1617

--------------------------------------------------
Iteration 58
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 753+61  T 814  [92m O [0m 814 
Q 54+109  T 163  [92m O [0m 163 
Q 446+893 T 1339 [92m O [0m 1339
Q 26+422  T 448  [92m O [0m 448 
Q 27+690  T 717  [92m O [0m 717 
Q 496+9   T 505  [92m O [0m 505 
Q 307+38  T 345  [92m O [0m 345 
Q 67+803  T 870  [92m O [0m 870 
Q 134+1   T 135  [92m O [0m 135 
Q 738+8   T 746  [92m O [0m 746 

--------------------------------------------------
Iteration 59
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
Q 675+2   T 677  [92m O [0m 677 
Q 767+526 T 1293 

KeyboardInterrupt: 