In [1]:
import numpy as np
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, RNN
from keras import layers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

### one-hot encoding converter

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

### one-hot encoding map

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

## Data Generation

- addition data: 80000
- total data: 80000
- length of the sequence: 7

In [7]:
questions_with_plus = []
expected_with_plus = []
seen_with_plus = set()
print('Generating data...')
while len(questions_with_plus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen_with_plus:
        continue
    seen_with_plus.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_plus.append(query)
    expected_with_plus.append(ans)
print('Total addition questions:', len(questions_with_plus))

Generating data...
Total addition questions: 80000


### addition data sample

In [8]:
print(questions_with_plus[:5], expected_with_plus[:5])

['1+632  ', '69+0   ', '98+6   ', '1+3    ', '7+16   '] ['633 ', '69  ', '104 ', '4   ', '23  ']


In [9]:
questions = questions_with_plus
expected = expected_with_plus

## Processing

### transfer data to one-hot representation

In [10]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


### Split data into training, validation, testing

In [11]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 4, 12)
Validation Data:
(2000, 7, 12)
(2000, 4, 12)
Testing Data:
(60000, 7, 12)
(60000, 4, 12)


In [12]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False  True False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False False False False False  True False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False  True False False False False False False False
   False]
  [False False False False False False False False False False False
    True]
  [ True False False False False False False False False False False
   False]]

 [[False False False False  True False False False False False False
   False]
  [False False False False False False False False False False  True
   False]
  [False False False False False False False False  True False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False False False False False Fal

## Build Model

In [13]:
def get_model():
    model = Sequential()
    model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
    model.add(layers.RepeatVector(DIGITS + 1))
    for _ in range(LAYERS):
        model.add(RNN(HIDDEN_SIZE, return_sequences=True))
    model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [14]:
print('Build model...')
model = get_model()
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


### Training

In [15]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 170+442 T 612  [91m☒[0m 111 
Q 966+40  T 1006 [91m☒[0m 159 
Q 227+96  T 323  [91m☒[0m 159 
Q 632+86  T 718  [91m☒[0m 159 
Q 241+38  T 279  [91m☒[0m 159 
Q 97+374  T 471  [91m☒[0m 111 
Q 240+6   T 246  [91m☒[0m 15  
Q 491+35  T 526  [91m☒[0m 159 
Q 489+98  T 587  [91m☒[0m 110 
Q 515+123 T 638  [91m☒[0m 111 

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 36+864  T 900  [91m☒[0m 129 
Q 144+28  T 172  [91m☒[0m 129 
Q 8+490   T 498  [91m☒[0m 129 
Q 6+523   T 529  [91m☒[0m 82  
Q 573+291 T 864  [91m☒[0m 111 
Q 620+164 T 784  [91m☒[0m 121 
Q 988+923 T 1911 [91m☒[0m 1111
Q 56+859  T 915  [91m☒[0m 109 
Q 520+799 T 1319 [91m☒[0m 111 
Q 499+492 T 991  [91m☒[0m 1111

--------------------------------------------------
Iteration 2
Train on 18000 samples, valida

Q 8+378   T 386  [91m☒[0m 485 
Q 928+69  T 997  [92m☑[0m 997 
Q 947+758 T 1705 [91m☒[0m 1642
Q 573+291 T 864  [91m☒[0m 791 
Q 0+293   T 293  [91m☒[0m 299 
Q 72+280  T 352  [91m☒[0m 355 
Q 424+2   T 426  [91m☒[0m 455 
Q 113+891 T 1004 [91m☒[0m 100 
Q 817+774 T 1591 [91m☒[0m 1597
Q 451+4   T 455  [91m☒[0m 457 

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 328+302 T 630  [91m☒[0m 666 
Q 462+47  T 509  [91m☒[0m 508 
Q 97+714  T 811  [91m☒[0m 881 
Q 24+41   T 65   [91m☒[0m 66  
Q 565+0   T 565  [92m☑[0m 565 
Q 284+77  T 361  [91m☒[0m 368 
Q 863+2   T 865  [91m☒[0m 861 
Q 849+435 T 1284 [91m☒[0m 1310
Q 887+240 T 1127 [91m☒[0m 1100
Q 9+595   T 604  [91m☒[0m 698 

--------------------------------------------------
Iteration 16
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 737+480 T 1217 [91m☒[0m 1272
Q 900+893 T 1793 [91m☒[0m 1888
Q 894+47  T 941  [91m☒[0

Q 677+19  T 696  [91m☒[0m 785 
Q 844+110 T 954  [91m☒[0m 955 
Q 55+726  T 781  [92m☑[0m 781 
Q 5+854   T 859  [91m☒[0m 858 
Q 688+752 T 1440 [91m☒[0m 1433
Q 99+784  T 883  [92m☑[0m 883 
Q 451+46  T 497  [91m☒[0m 487 
Q 8+203   T 211  [91m☒[0m 210 
Q 108+67  T 175  [91m☒[0m 174 
Q 837+443 T 1280 [92m☑[0m 1280

--------------------------------------------------
Iteration 29
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 65+704  T 769  [91m☒[0m 778 
Q 527+89  T 616  [92m☑[0m 616 
Q 7+654   T 661  [92m☑[0m 661 
Q 55+726  T 781  [92m☑[0m 781 
Q 686+762 T 1448 [91m☒[0m 1446
Q 612+32  T 644  [92m☑[0m 644 
Q 635+784 T 1419 [91m☒[0m 1418
Q 544+325 T 869  [91m☒[0m 868 
Q 183+10  T 193  [91m☒[0m 194 
Q 442+93  T 535  [92m☑[0m 535 

--------------------------------------------------
Iteration 30
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 451+46  T 497  [91m☒[0m 496 
Q 5+59    T 64   [91m☒[0m 65  
Q 58+512  T 570  [92m☑[0

Q 624+81  T 705  [92m☑[0m 705 
Q 17+520  T 537  [92m☑[0m 537 
Q 189+48  T 237  [92m☑[0m 237 
Q 564+331 T 895  [92m☑[0m 895 
Q 872+62  T 934  [92m☑[0m 934 
Q 78+361  T 439  [92m☑[0m 439 
Q 12+337  T 349  [91m☒[0m 340 
Q 988+32  T 1020 [91m☒[0m 1010
Q 651+608 T 1259 [91m☒[0m 1269
Q 494+65  T 559  [92m☑[0m 559 

--------------------------------------------------
Iteration 43
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 995+532 T 1527 [92m☑[0m 1527
Q 1+47    T 48   [92m☑[0m 48  
Q 549+439 T 988  [91m☒[0m 977 
Q 284+77  T 361  [92m☑[0m 361 
Q 957+888 T 1845 [92m☑[0m 1845
Q 63+831  T 894  [92m☑[0m 894 
Q 646+476 T 1122 [92m☑[0m 1122
Q 35+961  T 996  [92m☑[0m 996 
Q 896+60  T 956  [92m☑[0m 956 
Q 6+225   T 231  [92m☑[0m 231 

--------------------------------------------------
Iteration 44
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 399+631 T 1030 [92m☑[0m 1030
Q 18+80   T 98   [91m☒[0m 90  
Q 90+198  T 288  [92m☑[0

Q 885+638 T 1523 [92m☑[0m 1523
Q 988+32  T 1020 [92m☑[0m 1020
Q 277+26  T 303  [92m☑[0m 303 
Q 242+530 T 772  [92m☑[0m 772 
Q 836+110 T 946  [91m☒[0m 945 
Q 18+560  T 578  [92m☑[0m 578 
Q 141+16  T 157  [92m☑[0m 157 
Q 65+597  T 662  [92m☑[0m 662 
Q 619+567 T 1186 [92m☑[0m 1186
Q 891+902 T 1793 [92m☑[0m 1793

--------------------------------------------------
Iteration 57
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 346+27  T 373  [92m☑[0m 373 
Q 211+858 T 1069 [92m☑[0m 1069
Q 491+89  T 580  [92m☑[0m 580 
Q 914+392 T 1306 [91m☒[0m 1206
Q 753+643 T 1396 [92m☑[0m 1396
Q 991+405 T 1396 [91m☒[0m 1496
Q 73+37   T 110  [92m☑[0m 110 
Q 29+131  T 160  [92m☑[0m 160 
Q 756+414 T 1170 [92m☑[0m 1170
Q 120+374 T 494  [91m☒[0m 594 

--------------------------------------------------
Iteration 58
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 768+918 T 1686 [92m☑[0m 1686
Q 871+34  T 905  [92m☑[0m 905 
Q 0+293   T 293  [92m☑[0

Q 4+759   T 763  [92m☑[0m 763 
Q 220+740 T 960  [92m☑[0m 960 
Q 1+608   T 609  [91m☒[0m 619 
Q 5+156   T 161  [92m☑[0m 161 
Q 353+975 T 1328 [92m☑[0m 1328
Q 379+42  T 421  [92m☑[0m 421 
Q 820+344 T 1164 [92m☑[0m 1164
Q 916+49  T 965  [92m☑[0m 965 
Q 41+868  T 909  [91m☒[0m 919 
Q 912+513 T 1425 [92m☑[0m 1425

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 194+196 T 390  [91m☒[0m 380 
Q 822+75  T 897  [92m☑[0m 897 
Q 50+404  T 454  [92m☑[0m 454 
Q 982+28  T 1010 [91m☒[0m 1000
Q 945+7   T 952  [91m☒[0m 951 
Q 174+906 T 1080 [92m☑[0m 1080
Q 205+633 T 838  [91m☒[0m 848 
Q 116+32  T 148  [92m☑[0m 148 
Q 52+143  T 195  [92m☑[0m 195 
Q 177+728 T 905  [92m☑[0m 905 

--------------------------------------------------
Iteration 72
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 79+635  T 714  [92m☑[0m 714 
Q 57+74   T 131  [92m☑[0m 131 
Q 18+560  T 578  [92m☑[0

Q 766+887 T 1653 [92m☑[0m 1653
Q 520+799 T 1319 [91m☒[0m 1329
Q 86+222  T 308  [92m☑[0m 308 
Q 8+490   T 498  [92m☑[0m 498 
Q 68+154  T 222  [92m☑[0m 222 
Q 363+955 T 1318 [92m☑[0m 1318
Q 280+198 T 478  [92m☑[0m 478 
Q 264+78  T 342  [92m☑[0m 342 
Q 1+284   T 285  [92m☑[0m 285 
Q 652+943 T 1595 [92m☑[0m 1595

--------------------------------------------------
Iteration 85
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 32+448  T 480  [92m☑[0m 480 
Q 183+372 T 555  [92m☑[0m 555 
Q 163+625 T 788  [92m☑[0m 788 
Q 9+803   T 812  [92m☑[0m 812 
Q 635+43  T 678  [92m☑[0m 678 
Q 444+4   T 448  [92m☑[0m 448 
Q 194+195 T 389  [92m☑[0m 389 
Q 664+14  T 678  [92m☑[0m 678 
Q 16+96   T 112  [92m☑[0m 112 
Q 10+925  T 935  [92m☑[0m 935 

--------------------------------------------------
Iteration 86
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 646+730 T 1376 [92m☑[0m 1376
Q 216+948 T 1164 [92m☑[0m 1164
Q 900+209 T 1109 [92m☑[0

Q 21+622  T 643  [92m☑[0m 643 
Q 9+36    T 45   [91m☒[0m 46  
Q 82+256  T 338  [92m☑[0m 338 
Q 28+107  T 135  [92m☑[0m 135 
Q 43+533  T 576  [92m☑[0m 576 
Q 48+454  T 502  [92m☑[0m 502 
Q 77+705  T 782  [92m☑[0m 782 
Q 337+37  T 374  [92m☑[0m 374 
Q 99+131  T 230  [92m☑[0m 230 
Q 5+59    T 64   [92m☑[0m 64  

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 73+204  T 277  [92m☑[0m 277 
Q 218+44  T 262  [92m☑[0m 262 
Q 433+22  T 455  [92m☑[0m 455 
Q 52+300  T 352  [92m☑[0m 352 
Q 900+733 T 1633 [92m☑[0m 1633
Q 37+724  T 761  [92m☑[0m 761 
Q 654+629 T 1283 [92m☑[0m 1283
Q 852+891 T 1743 [92m☑[0m 1743
Q 609+423 T 1032 [92m☑[0m 1032
Q 250+16  T 266  [92m☑[0m 266 


### Testing

In [16]:
evaluated_loss, evaludated_accuracy = model.evaluate(x=test_x, y=test_y)



In [17]:
print("testing loss:{}, testing accuracy:{}".format(evaluated_loss, evaludated_accuracy))

testing loss:0.046445643948142726, testing accuracy:0.984575
