In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'


TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+- '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [3]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [4]:
ctable = CharacterTable(chars)

In [5]:
print (ctable.chars)

[' ', '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']


In [6]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    a, b = max(a,b), min(a,b)
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}-{}'.format(a, b)
    q1 = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    query1 = q1 + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans1 = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    ans1 += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
        query1 = query1[::-1]
    questions.append(query)
    questions.append(query1)
    expected.append(ans)
    expected.append(ans1)
print (questions[:5],expected[:5])

Generating data...
['658-2  ', '658+2  ', '71-8   ', '71+8   ', '641-4  '] ['656 ', '660', '63  ', '79', '637 ']


In [7]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

print (x[:5],y[:5])

Vectorization...
[[[False False False False False False False False False  True False
   False False]
  [False False False False False False False False  True False False
   False False]
  [False False False False False False False False False False False
    True False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False  True False False False False False
   False False]
  [ True False False False False False False False False False False
   False False]
  [ True False False False False False False False False False False
   False False]]

 [[False False False False False False False False False  True False
   False False]
  [False False False False False False False False  True False False
   False False]
  [False False False False False False False False False False False
    True False]
  [False  True False False False False False False False False False
   False False]
  [False False False False False  True False F

In [8]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10

(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

In [9]:
print('Build model...' )
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 13)             1677      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 13)             0         
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


In [10]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 927-6   T 921  [91m☒[0m 135 
Q 9-5     T 4    [91m☒[0m 3   
Q 266+6   T 272  [91m☒[0m 1333
Q 862+355 T 1217 [91m☒[0m 1333
Q 52+28   T 80   [91m☒[0m 1333
Q 44+2    T 46   [91m☒[0m 1333
Q 865+1   T 866  [91m☒[0m 1333
Q 275+60  T 335  [91m☒[0m 1333
Q 337+179 T 516  [91m☒[0m 1333
Q 752+388 T 1140 [91m☒[0m 1333

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 139-72  T 67   [91m☒[0m 11  
Q 849-412 T 437  [91m☒[0m 110 
Q 790-43  T 747  [91m☒[0m 130 
Q 519-3   T 516  [91m☒[0m 139 
Q 213-24  T 189  [91m☒[0m 211 
Q 937+689 T 1626 [91m☒[0m 1177
Q 604+237 T 841  [91m☒[0m 1111
Q 739-234 T 505  [91m☒[0m 139 
Q 514-32  T 482  [91m☒[0m 111 
Q

Q 98-74   T 24   [91m☒[0m 71  
Q 824-233 T 591  [91m☒[0m 566 
Q 923-27  T 896  [91m☒[0m 816 
Q 36-29   T 7    [91m☒[0m 1   
Q 737+249 T 986  [91m☒[0m 1121
Q 981+804 T 1785 [91m☒[0m 1617
Q 776+134 T 910  [91m☒[0m 1099
Q 704-61  T 643  [91m☒[0m 619 
Q 398-169 T 229  [91m☒[0m 266 
Q 236+44  T 280  [91m☒[0m 364 

--------------------------------------------------
Iteration 14
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 754-235 T 519  [91m☒[0m 532 
Q 554+95  T 649  [91m☒[0m 6040
Q 130+72  T 202  [91m☒[0m 116 
Q 879+546 T 1425 [91m☒[0m 1366
Q 230-55  T 175  [91m☒[0m 133 
Q 74-59   T 15   [91m☒[0m 2   
Q 783-38  T 745  [91m☒[0m 738 
Q 941+56  T 997  [91m☒[0m 1013
Q 794-1   T 793  [91m☒[0m 778 
Q 145-60  T 85   [91m☒[0m 13  

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 37-17   T 20   [91m☒[0m 2   
Q 485-4   T 481  [91m☒[0m 483 
Q 73-39   T 34   [91m☒[0

Q 689+78  T 767  [91m☒[0m 7553
Q 642+0   T 642  [92m☑[0m 642 
Q 901-295 T 606  [91m☒[0m 603 
Q 466+102 T 568  [91m☒[0m 677 
Q 39-9    T 30   [91m☒[0m 2   
Q 798+88  T 886  [91m☒[0m 8668
Q 110-56  T 54   [91m☒[0m 53  
Q 813+9   T 822  [91m☒[0m 828 
Q 817-5   T 812  [91m☒[0m 815 
Q 809+158 T 967  [91m☒[0m 9093

--------------------------------------------------
Iteration 28
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 381-5   T 376  [91m☒[0m 378 
Q 557-424 T 133  [91m☒[0m 14  
Q 426-333 T 93   [91m☒[0m 13  
Q 795-50  T 745  [91m☒[0m 733 
Q 566-230 T 336  [91m☒[0m 266 
Q 859+44  T 903  [91m☒[0m 9244
Q 490+4   T 494  [91m☒[0m 402 
Q 330+95  T 425  [91m☒[0m 408 
Q 519+323 T 842  [91m☒[0m 8666
Q 708-92  T 616  [91m☒[0m 628 

--------------------------------------------------
Iteration 29
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 55+24   T 79   [91m☒[0m 8776
Q 208+81  T 289  [91m☒[0m 207 
Q 811-39  T 772  [91m☒[0

Q 317+45  T 362  [91m☒[0m 363 
Q 453-29  T 424  [91m☒[0m 423 
Q 671-34  T 637  [91m☒[0m 636 
Q 414-54  T 360  [91m☒[0m 367 
Q 391-85  T 306  [91m☒[0m 312 
Q 108-74  T 34   [91m☒[0m 33  
Q 954-56  T 898  [91m☒[0m 999 
Q 364+116 T 480  [91m☒[0m 592 
Q 698-1   T 697  [91m☒[0m 695 
Q 172+31  T 203  [91m☒[0m 197 

--------------------------------------------------
Iteration 42
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 309+280 T 589  [91m☒[0m 404 
Q 662+95  T 757  [91m☒[0m 7530
Q 97+70   T 167  [91m☒[0m 1799
Q 270+32  T 302  [91m☒[0m 201 
Q 176+84  T 260  [91m☒[0m 257 
Q 974-227 T 747  [91m☒[0m 746 
Q 908-668 T 240  [91m☒[0m 222 
Q 438-1   T 437  [91m☒[0m 438 
Q 96+64   T 160  [91m☒[0m 1533
Q 804+11  T 815  [91m☒[0m 812 

--------------------------------------------------
Iteration 43
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 832-341 T 491  [91m☒[0m 588 
Q 791+32  T 823  [91m☒[0m 8203
Q 51-12   T 39   [92m☑[0

Q 443+0   T 443  [91m☒[0m 444 
Q 740-348 T 392  [91m☒[0m 476 
Q 265-82  T 183  [91m☒[0m 184 
Q 914+194 T 1108 [91m☒[0m 1125
Q 983-364 T 619  [92m☑[0m 619 
Q 454+198 T 652  [91m☒[0m 6520
Q 637-255 T 382  [91m☒[0m 481 
Q 74+2    T 76   [91m☒[0m 7596
Q 318+98  T 416  [91m☒[0m 419 
Q 143-98  T 45   [91m☒[0m 44  

--------------------------------------------------
Iteration 56
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 11+7    T 18   [91m☒[0m 161 
Q 483-81  T 402  [91m☒[0m 308 
Q 56-14   T 42   [92m☑[0m 42  
Q 737+249 T 986  [91m☒[0m 9976
Q 214-26  T 188  [92m☑[0m 188 
Q 327+2   T 329  [91m☒[0m 327 
Q 897-5   T 892  [91m☒[0m 893 
Q 158-96  T 62   [91m☒[0m 57  
Q 622-395 T 227  [91m☒[0m 226 
Q 780-121 T 659  [92m☑[0m 659 

--------------------------------------------------
Iteration 57
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 938+571 T 1509 [91m☒[0m 1597
Q 263-9   T 254  [91m☒[0m 255 
Q 564+1   T 565  [91m☒[0

Q 939-256 T 683  [92m☑[0m 683 
Q 417+69  T 486  [91m☒[0m 4866
Q 618+28  T 646  [91m☒[0m 647 
Q 486+0   T 486  [91m☒[0m 487 
Q 689-132 T 557  [92m☑[0m 557 
Q 174+14  T 188  [92m☑[0m 188 
Q 53-48   T 5    [92m☑[0m 5   
Q 675+3   T 678  [91m☒[0m 6789
Q 332-253 T 79   [91m☒[0m 17  
Q 979-345 T 634  [92m☑[0m 634 

--------------------------------------------------
Iteration 70
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 717-560 T 157  [91m☒[0m 165 
Q 657-96  T 561  [92m☑[0m 561 
Q 82+20   T 102  [91m☒[0m 1020
Q 230-61  T 169  [91m☒[0m 179 
Q 871-80  T 791  [92m☑[0m 791 
Q 174+14  T 188  [92m☑[0m 188 
Q 276+255 T 531  [91m☒[0m 432 
Q 83-36   T 47   [92m☑[0m 47  
Q 188-3   T 185  [91m☒[0m 174 
Q 988-17  T 971  [92m☑[0m 971 

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 112+26  T 138  [91m☒[0m 137 
Q 86+22   T 108  [91m☒[0m 1075
Q 699+396 T 1095 [91m☒[0

Q 367+92  T 459  [92m☑[0m 459 
Q 811-6   T 805  [92m☑[0m 805 
Q 415-95  T 320  [92m☑[0m 320 
Q 804+11  T 815  [92m☑[0m 815 
Q 983+47  T 1030 [91m☒[0m 1029
Q 132-31  T 101  [91m☒[0m 102 
Q 858+90  T 948  [91m☒[0m 958 
Q 526+251 T 777  [91m☒[0m 7778
Q 626-5   T 621  [92m☑[0m 621 
Q 800-749 T 51   [91m☒[0m 3   

--------------------------------------------------
Iteration 84
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 346+272 T 618  [91m☒[0m 617 
Q 426-333 T 93   [91m☒[0m 11  
Q 936+233 T 1169 [91m☒[0m 1180
Q 82+71   T 153  [91m☒[0m 1532
Q 721-408 T 313  [92m☑[0m 313 
Q 932-451 T 481  [91m☒[0m 460 
Q 393+7   T 400  [92m☑[0m 400 
Q 589-543 T 46   [91m☒[0m 14  
Q 435-143 T 292  [92m☑[0m 292 
Q 72+11   T 83   [91m☒[0m 832 

--------------------------------------------------
Iteration 85
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 807+800 T 1607 [91m☒[0m 1579
Q 602-7   T 595  [92m☑[0m 595 
Q 184+3   T 187  [92m☑[0

Q 993-83  T 910  [91m☒[0m 900 
Q 490+4   T 494  [91m☒[0m 495 
Q 745+73  T 818  [92m☑[0m 818 
Q 148+1   T 149  [92m☑[0m 149 
Q 64-23   T 41   [92m☑[0m 41  
Q 413-3   T 410  [91m☒[0m 409 
Q 126+6   T 132  [92m☑[0m 132 
Q 574+69  T 643  [92m☑[0m 643 
Q 281-16  T 265  [92m☑[0m 265 
Q 107-97  T 10   [92m☑[0m 10  

--------------------------------------------------
Iteration 98
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 270+32  T 302  [91m☒[0m 312 
Q 70-54   T 16   [91m☒[0m 17  
Q 361+86  T 447  [91m☒[0m 4576
Q 346+272 T 618  [91m☒[0m 628 
Q 95-33   T 62   [92m☑[0m 62  
Q 992+1   T 993  [92m☑[0m 993 
Q 899-37  T 862  [91m☒[0m 863 
Q 557-69  T 488  [92m☑[0m 488 
Q 55+16   T 71   [91m☒[0m 715 
Q 928-4   T 924  [91m☒[0m 925 

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 469+357 T 826  [91m☒[0m 8165
Q 801-427 T 374  [91m☒[0m 364 
Q 264+8   T 272  [92m☑[0

In [11]:
print("MSG : Prediction")
test_x = ["555-275", "860-7  ", "340-29 ", "555+275", "860+7  ", "340+29 "]
test_y = ["280 ", "853 ", "311 ", "830 ", "867 ", "369 "]
x = np.zeros((len(test_x), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(test_y), DIGITS + 1, len(chars)), dtype=np.bool)
for j, (i, c) in enumerate(zip(test_x, test_y)):
    x[j] = ctable.encode(i, MAXLEN)
    y[j] = ctable.encode(c, DIGITS + 1)

MSG : Prediction


In [12]:
right = 0
preds = model.predict_classes(x, verbose=0)
for i in range(len(preds)):
    q = ctable.decode(x[i])
    correct = ctable.decode(y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
        right += 1
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)
print("MSG : Accuracy is {}".format(right / len(preds)))

Q 555-275 T 280  [92m☑[0m 280 
Q 860-7   T 853  [92m☑[0m 853 
Q 340-29  T 311  [92m☑[0m 311 
Q 555+275 T 830  [91m☒[0m 8300
Q 860+7   T 867  [91m☒[0m 7676
Q 340+29  T 369  [92m☑[0m 369 
MSG : Accuracy is 0.6666666666666666
