In [3]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


# Parameters Config

In [4]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [5]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [6]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [7]:
ctable = CharacterTable(chars)

In [8]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

# Data Generation

In [9]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [10]:
print(questions[:5], expected[:5])

['59+8   ', '8+9    ', '74+8   ', '5+37   ', '1+164  '] ['67  ', '17  ', '82  ', '42  ', '165 ']


# Processing

In [11]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [12]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 4, 12)
Validation Data:
(2000, 7, 12)
(2000, 4, 12)
Testing Data:
(60000, 7, 12)
(60000, 4, 12)


In [16]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False False False  True
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False False  True False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False False  True
   False]
  [False False False False False False False False  True False False
   False]
  [False False False False False False False False False False False
    True]]

 [[False False False  True False False False False False False False
   False]
  [False False False False False False False False  True False False
   False]
  [False False False False False False False False False  True False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False  True False False False Fal

# Build Model

In [14]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


# Training

In [15]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 542+6   T 548  [91m☒[0m 10  
Q 582+11  T 593  [91m☒[0m 100 
Q 357+17  T 374  [91m☒[0m 100 
Q 646+716 T 1362 [91m☒[0m 100 
Q 142+5   T 147  [91m☒[0m 10  
Q 43+638  T 681  [91m☒[0m 100 
Q 450+20  T 470  [91m☒[0m 100 
Q 578+36  T 614  [91m☒[0m 100 
Q 3+616   T 619  [91m☒[0m 10  
Q 523+714 T 1237 [91m☒[0m 100 

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 975+97  T 1072 [91m☒[0m 100 
Q 2+217   T 219  [91m☒[0m 82  
Q 386+8   T 394  [91m☒[0m 155 
Q 633+394 T 1027 [91m☒[0m 101 
Q 214+1   T 215  [91m☒[0m 52  
Q 46+956  T 1002 [91m☒[0m 105 
Q 299+176 T 475  [91m☒[0m 1111
Q 28+45   T 73   [91m☒[0m 85  
Q 922+971 T 1893 [91m☒[0m 1111
Q 149+94  T 243  [91m☒[0m 100 

--------------------------------------------------
Iteration 2
Train on 18000 samples, valida

Q 491+85  T 576  [91m☒[0m 568 
Q 2+492   T 494  [91m☒[0m 498 
Q 927+42  T 969  [91m☒[0m 975 
Q 600+11  T 611  [91m☒[0m 622 
Q 949+56  T 1005 [91m☒[0m 1020
Q 554+99  T 653  [91m☒[0m 643 
Q 81+513  T 594  [91m☒[0m 581 
Q 0+246   T 246  [91m☒[0m 259 
Q 749+3   T 752  [91m☒[0m 138 
Q 967+67  T 1034 [91m☒[0m 1042

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 435+781 T 1216 [91m☒[0m 1208
Q 517+492 T 1009 [91m☒[0m 1034
Q 420+31  T 451  [91m☒[0m 442 
Q 742+706 T 1448 [91m☒[0m 1484
Q 800+946 T 1746 [91m☒[0m 1737
Q 21+329  T 350  [91m☒[0m 342 
Q 77+858  T 935  [91m☒[0m 942 
Q 88+215  T 303  [91m☒[0m 308 
Q 241+316 T 557  [91m☒[0m 542 
Q 890+2   T 892  [91m☒[0m 933 

--------------------------------------------------
Iteration 16
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 764+25  T 789  [91m☒[0m 797 
Q 736+25  T 761  [91m☒[0m 777 
Q 17+447  T 464  [91m☒[0

Q 249+50  T 299  [91m☒[0m 290 
Q 61+423  T 484  [92m☑[0m 484 
Q 391+56  T 447  [91m☒[0m 446 
Q 452+59  T 511  [91m☒[0m 510 
Q 347+82  T 429  [91m☒[0m 428 
Q 93+899  T 992  [92m☑[0m 992 
Q 507+276 T 783  [91m☒[0m 704 
Q 683+2   T 685  [92m☑[0m 685 
Q 0+916   T 916  [91m☒[0m 906 
Q 611+333 T 944  [92m☑[0m 944 

--------------------------------------------------
Iteration 29
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 44+843  T 887  [92m☑[0m 887 
Q 73+204  T 277  [92m☑[0m 277 
Q 570+419 T 989  [91m☒[0m 999 
Q 63+310  T 373  [92m☑[0m 373 
Q 7+785   T 792  [91m☒[0m 782 
Q 254+77  T 331  [92m☑[0m 331 
Q 436+325 T 761  [92m☑[0m 761 
Q 234+744 T 978  [91m☒[0m 987 
Q 80+46   T 126  [91m☒[0m 137 
Q 80+514  T 594  [92m☑[0m 594 

--------------------------------------------------
Iteration 30
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 42+20   T 62   [92m☑[0m 62  
Q 160+174 T 334  [91m☒[0m 435 
Q 459+228 T 687  [91m☒[0

Q 18+953  T 971  [92m☑[0m 971 
Q 933+887 T 1820 [92m☑[0m 1820
Q 570+419 T 989  [92m☑[0m 989 
Q 780+182 T 962  [92m☑[0m 962 
Q 1+676   T 677  [91m☒[0m 678 
Q 562+310 T 872  [91m☒[0m 871 
Q 229+438 T 667  [92m☑[0m 667 
Q 3+616   T 619  [92m☑[0m 619 
Q 842+779 T 1621 [91m☒[0m 1611
Q 61+423  T 484  [92m☑[0m 484 

--------------------------------------------------
Iteration 43
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 998+2   T 1000 [91m☒[0m 990 
Q 23+944  T 967  [92m☑[0m 967 
Q 495+873 T 1368 [92m☑[0m 1368
Q 64+360  T 424  [92m☑[0m 424 
Q 57+271  T 328  [92m☑[0m 328 
Q 268+34  T 302  [92m☑[0m 302 
Q 65+90   T 155  [92m☑[0m 155 
Q 928+11  T 939  [92m☑[0m 939 
Q 659+786 T 1445 [92m☑[0m 1445
Q 207+156 T 363  [91m☒[0m 373 

--------------------------------------------------
Iteration 44
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 25+612  T 637  [92m☑[0m 637 
Q 139+27  T 166  [92m☑[0m 166 
Q 396+915 T 1311 [91m☒[0

Q 322+653 T 975  [92m☑[0m 975 
Q 264+972 T 1236 [92m☑[0m 1236
Q 560+517 T 1077 [92m☑[0m 1077
Q 253+768 T 1021 [91m☒[0m 1011
Q 68+920  T 988  [92m☑[0m 988 
Q 276+397 T 673  [92m☑[0m 673 
Q 911+352 T 1263 [92m☑[0m 1263
Q 881+738 T 1619 [91m☒[0m 1610
Q 611+74  T 685  [92m☑[0m 685 
Q 38+767  T 805  [92m☑[0m 805 

--------------------------------------------------
Iteration 57
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 67+64   T 131  [92m☑[0m 131 
Q 69+57   T 126  [92m☑[0m 126 
Q 857+813 T 1670 [91m☒[0m 1660
Q 67+64   T 131  [92m☑[0m 131 
Q 0+957   T 957  [92m☑[0m 957 
Q 266+17  T 283  [92m☑[0m 283 
Q 247+853 T 1100 [92m☑[0m 1100
Q 66+853  T 919  [91m☒[0m 929 
Q 760+923 T 1683 [92m☑[0m 1683
Q 535+44  T 579  [92m☑[0m 579 

--------------------------------------------------
Iteration 58
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 261+148 T 409  [92m☑[0m 409 
Q 434+818 T 1252 [92m☑[0m 1252
Q 706+45  T 751  [92m☑[0

Q 793+890 T 1683 [92m☑[0m 1683
Q 547+936 T 1483 [92m☑[0m 1483
Q 642+975 T 1617 [92m☑[0m 1617
Q 734+290 T 1024 [92m☑[0m 1024
Q 71+678  T 749  [92m☑[0m 749 
Q 7+84    T 91   [92m☑[0m 91  
Q 303+49  T 352  [92m☑[0m 352 
Q 830+0   T 830  [91m☒[0m 831 
Q 9+449   T 458  [92m☑[0m 458 
Q 86+519  T 605  [92m☑[0m 605 

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 970+54  T 1024 [92m☑[0m 1024
Q 570+419 T 989  [92m☑[0m 989 
Q 7+785   T 792  [92m☑[0m 792 
Q 963+175 T 1138 [92m☑[0m 1138
Q 92+119  T 211  [92m☑[0m 211 
Q 83+168  T 251  [92m☑[0m 251 
Q 89+739  T 828  [92m☑[0m 828 
Q 554+99  T 653  [92m☑[0m 653 
Q 58+981  T 1039 [92m☑[0m 1039
Q 794+703 T 1497 [91m☒[0m 1597

--------------------------------------------------
Iteration 72
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 465+269 T 734  [92m☑[0m 734 
Q 92+849  T 941  [92m☑[0m 941 
Q 761+655 T 1416 [92m☑[0

Q 759+155 T 914  [92m☑[0m 914 
Q 836+43  T 879  [92m☑[0m 879 
Q 546+133 T 679  [92m☑[0m 679 
Q 93+75   T 168  [92m☑[0m 168 
Q 94+576  T 670  [92m☑[0m 670 
Q 745+172 T 917  [92m☑[0m 917 
Q 967+67  T 1034 [92m☑[0m 1034
Q 817+559 T 1376 [92m☑[0m 1376
Q 354+695 T 1049 [92m☑[0m 1049
Q 910+15  T 925  [92m☑[0m 925 

--------------------------------------------------
Iteration 85
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 91+389  T 480  [91m☒[0m 470 
Q 36+146  T 182  [92m☑[0m 182 
Q 882+554 T 1436 [92m☑[0m 1436
Q 427+77  T 504  [92m☑[0m 504 
Q 897+35  T 932  [92m☑[0m 932 
Q 40+6    T 46   [92m☑[0m 46  
Q 176+8   T 184  [92m☑[0m 184 
Q 538+32  T 570  [92m☑[0m 570 
Q 716+133 T 849  [92m☑[0m 849 
Q 78+611  T 689  [92m☑[0m 689 

--------------------------------------------------
Iteration 86
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 992+344 T 1336 [92m☑[0m 1336
Q 1+885   T 886  [92m☑[0m 886 
Q 351+2   T 353  [92m☑[0

Q 578+36  T 614  [92m☑[0m 614 
Q 66+853  T 919  [91m☒[0m 929 
Q 558+92  T 650  [92m☑[0m 650 
Q 18+742  T 760  [92m☑[0m 760 
Q 23+12   T 35   [92m☑[0m 35  
Q 842+77  T 919  [91m☒[0m 929 
Q 378+38  T 416  [92m☑[0m 416 
Q 344+875 T 1219 [92m☑[0m 1219
Q 569+38  T 607  [92m☑[0m 607 
Q 4+579   T 583  [92m☑[0m 583 

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 108+25  T 133  [92m☑[0m 133 
Q 882+554 T 1436 [92m☑[0m 1436
Q 186+667 T 853  [92m☑[0m 853 
Q 29+171  T 200  [92m☑[0m 200 
Q 853+391 T 1244 [92m☑[0m 1244
Q 194+632 T 826  [92m☑[0m 826 
Q 546+133 T 679  [92m☑[0m 679 
Q 111+86  T 197  [92m☑[0m 197 
Q 247+81  T 328  [92m☑[0m 328 
Q 234+43  T 277  [92m☑[0m 277 


In [None]:
print("MSG : Prediction")
test_x = ["555+275", "860+7  ", "340+29 "]
test_y = ["830 ", "867 ", "369 "]
x = np.zeros((len(test_x), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(test_y), DIGITS + 1, len(chars)), dtype=np.bool)
for j, (i, c) in enumerate(zip(test_x, test_y)):
    x[j] = ctable.encode(i, MAXLEN)
    y[j] = ctable.encode(c, DIGITS + 1)

# Validation

In [None]:
right = 0
preds = model.predict_classes(test_x, verbose=0)
for i in range(len(preds)):
    q = ctable.decode(test_x[i])
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
        right += 1
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)
print("MSG : Accuracy is {}".format(right / len(preds)))