In [1]:
import numpy as np
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, RNN
from keras import layers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+- '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 256
LAYERS = 1

### one-hot encoding converter

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

### one-hot encoding map

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '-',
 3: '0',
 4: '1',
 5: '2',
 6: '3',
 7: '4',
 8: '5',
 9: '6',
 10: '7',
 11: '8',
 12: '9'}

## Data Generation

- addition data: 80000
- substraction data: 80000
- total data: 160000
- length of the sequence: 7

In [7]:
questions_with_plus = []
expected_with_plus = []
seen_with_plus = set()
print('Generating data...')
while len(questions_with_plus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen_with_plus:
        continue
    seen_with_plus.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_plus.append(query)
    expected_with_plus.append(ans)
print('Total addition questions:', len(questions_with_plus))

Generating data...
Total addition questions: 80000


### addition data sample

In [8]:
print(questions_with_plus[:5], expected_with_plus[:5])

['7+0    ', '7+4    ', '3+0    ', '965+350', '7+779  '] ['7   ', '11  ', '3   ', '1315', '786 ']


In [9]:
questions_with_minus = []
expected_with_minus = []
seen_with_minus = set()
print('Generating data...')
while len(questions_with_minus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if a < b:
        continue
    if key in seen_with_minus:
        continue
    seen_with_minus.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_minus.append(query)
    expected_with_minus.append(ans)
print('Total subtraction questions:', len(questions_with_minus))

Generating data...
Total subtraction questions: 80000


### subtraction data sample

In [10]:
print(questions_with_minus[:5], expected_with_minus[:5])

['47-5   ', '58-9   ', '815-1  ', '89-20  ', '901-843'] ['42  ', '49  ', '814 ', '69  ', '58  ']


### combine addtion & subtraction data

In [11]:
questions = questions_with_plus + questions_with_minus
expected = expected_with_plus + expected_with_minus

## Processing

### transfer data to one-hot representation

In [12]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


### Split data into training, validation, testing

In [13]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:40000]
train_y = y[:40000]
test_x = x[40000:]
test_y = y[40000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(36000, 7, 13)
(36000, 4, 13)
Validation Data:
(4000, 7, 13)
(4000, 4, 13)
Testing Data:
(120000, 7, 13)
(120000, 4, 13)


In [14]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False  True False False False
   False False]
  [False False False False False False False False False False  True
   False False]
  [False False False  True False False False False False False False
   False False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False False False False False  True False
   False False]
  [ True False False False False False False False False False False
   False False]
  [ True False False False False False False False False False False
   False False]]

 [[False False False False False  True False False False False False
   False False]
  [False False False False False False False False  True False False
   False False]
  [False False False False False False False False False  True False
   False False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False  True False False Fals

## Build Model

In [15]:
def get_model():
    model = Sequential()
    model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
    model.add(layers.RepeatVector(DIGITS + 1))
    for _ in range(LAYERS):
        model.add(RNN(HIDDEN_SIZE, return_sequences=True))
    model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [16]:
print('Build model...')
model = get_model()
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 13)             1677      
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


### Training

In [33]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 394-242 T 152  [92m☑[0m 152 
Q 364-42  T 322  [92m☑[0m 322 
Q 51+116  T 167  [92m☑[0m 167 
Q 555-451 T 104  [92m☑[0m 104 
Q 88+797  T 885  [92m☑[0m 885 
Q 421+490 T 911  [92m☑[0m 911 
Q 128-2   T 126  [92m☑[0m 126 
Q 413+195 T 608  [92m☑[0m 608 
Q 613-4   T 609  [92m☑[0m 609 
Q 56+298  T 354  [92m☑[0m 354 

--------------------------------------------------
Iteration 1
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 863-7   T 856  [92m☑[0m 856 
Q 819-71  T 748  [92m☑[0m 748 
Q 170-116 T 54   [91m☒[0m 53  
Q 889-99  T 790  [92m☑[0m 790 
Q 343-72  T 271  [92m☑[0m 271 
Q 141+16  T 157  [92m☑[0m 157 
Q 879-13  T 866  [92m☑[0m 866 
Q 916-48  T 868  [92m☑[0m 868 
Q 317+282 T 599  [92m☑[0m 599 
Q 490-356 T 134  [92m☑[0m 134 

--------------------------------------------------
Iteration 2
Train on 36000 samples, valida

Q 852-82  T 770  [92m☑[0m 770 
Q 784+26  T 810  [92m☑[0m 810 
Q 243+91  T 334  [92m☑[0m 334 
Q 456-128 T 328  [92m☑[0m 328 
Q 859+83  T 942  [92m☑[0m 942 
Q 689+55  T 744  [92m☑[0m 744 
Q 944+631 T 1575 [92m☑[0m 1575
Q 667-28  T 639  [92m☑[0m 639 
Q 451-243 T 208  [92m☑[0m 208 
Q 469+2   T 471  [92m☑[0m 471 

--------------------------------------------------
Iteration 15
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 36+822  T 858  [92m☑[0m 858 
Q 318+986 T 1304 [92m☑[0m 1304
Q 323-7   T 316  [92m☑[0m 316 
Q 884+65  T 949  [92m☑[0m 949 
Q 956-635 T 321  [92m☑[0m 321 
Q 905-90  T 815  [92m☑[0m 815 
Q 710-378 T 332  [92m☑[0m 332 
Q 404-81  T 323  [92m☑[0m 323 
Q 145+956 T 1101 [92m☑[0m 1101
Q 882-9   T 873  [92m☑[0m 873 

--------------------------------------------------
Iteration 16
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 888-15  T 873  [92m☑[0m 873 
Q 253+603 T 856  [92m☑[0m 856 
Q 603+321 T 924  [92m☑[0

Q 823+12  T 835  [92m☑[0m 835 
Q 650+3   T 653  [92m☑[0m 653 
Q 288-57  T 231  [92m☑[0m 231 
Q 9+564   T 573  [92m☑[0m 573 
Q 25+75   T 100  [91m☒[0m 90  
Q 718+342 T 1060 [92m☑[0m 1060
Q 75+842  T 917  [92m☑[0m 917 
Q 909-606 T 303  [92m☑[0m 303 
Q 436-0   T 436  [92m☑[0m 436 
Q 863+33  T 896  [92m☑[0m 896 

--------------------------------------------------
Iteration 29
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 943+70  T 1013 [92m☑[0m 1013
Q 761-0   T 761  [92m☑[0m 761 
Q 723+3   T 726  [92m☑[0m 726 
Q 66+233  T 299  [92m☑[0m 299 
Q 997-741 T 256  [92m☑[0m 256 
Q 733-91  T 642  [92m☑[0m 642 
Q 209-25  T 184  [92m☑[0m 184 
Q 873-820 T 53   [92m☑[0m 53  
Q 71-29   T 42   [92m☑[0m 42  
Q 302-65  T 237  [92m☑[0m 237 

--------------------------------------------------
Iteration 30
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 47+80   T 127  [92m☑[0m 127 
Q 65+200  T 265  [92m☑[0m 265 
Q 243+14  T 257  [92m☑[0

Q 557-57  T 500  [92m☑[0m 500 
Q 386+63  T 449  [92m☑[0m 449 
Q 72+575  T 647  [92m☑[0m 647 
Q 85+573  T 658  [92m☑[0m 658 
Q 131+17  T 148  [92m☑[0m 148 
Q 25+54   T 79   [91m☒[0m 89  
Q 9+777   T 786  [92m☑[0m 786 
Q 637-513 T 124  [92m☑[0m 124 
Q 76-63   T 13   [92m☑[0m 13  
Q 26+173  T 199  [91m☒[0m 299 

--------------------------------------------------
Iteration 43
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 148+417 T 565  [92m☑[0m 565 
Q 49-2    T 47   [92m☑[0m 47  
Q 715+20  T 735  [92m☑[0m 735 
Q 650-629 T 21   [91m☒[0m 10  
Q 668+385 T 1053 [92m☑[0m 1053
Q 251+681 T 932  [92m☑[0m 932 
Q 225+307 T 532  [91m☒[0m 533 
Q 398-297 T 101  [91m☒[0m 111 
Q 572-81  T 491  [92m☑[0m 491 
Q 1+58    T 59   [92m☑[0m 59  

--------------------------------------------------
Iteration 44
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 34-6    T 28   [92m☑[0m 28  
Q 456+253 T 709  [92m☑[0m 709 
Q 16+18   T 34   [92m☑[0

Q 446+47  T 493  [92m☑[0m 493 
Q 850+813 T 1663 [92m☑[0m 1663
Q 320-25  T 295  [92m☑[0m 295 
Q 63-33   T 30   [92m☑[0m 30  
Q 81-11   T 70   [91m☒[0m 60  
Q 229-9   T 220  [92m☑[0m 220 
Q 73+830  T 903  [92m☑[0m 903 
Q 139-124 T 15   [91m☒[0m 25  
Q 494+79  T 573  [92m☑[0m 573 
Q 80+54   T 134  [92m☑[0m 134 

--------------------------------------------------
Iteration 57
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 374+118 T 492  [92m☑[0m 492 
Q 61+12   T 73   [92m☑[0m 73  
Q 617-394 T 223  [92m☑[0m 223 
Q 127-8   T 119  [92m☑[0m 119 
Q 372-59  T 313  [92m☑[0m 313 
Q 451-243 T 208  [92m☑[0m 208 
Q 794-743 T 51   [92m☑[0m 51  
Q 54+346  T 400  [92m☑[0m 400 
Q 31+252  T 283  [92m☑[0m 283 
Q 452-448 T 4    [91m☒[0m 5   

--------------------------------------------------
Iteration 58
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 531+973 T 1504 [92m☑[0m 1504
Q 729-58  T 671  [92m☑[0m 671 
Q 715+694 T 1409 [92m☑[0

Q 531-54  T 477  [92m☑[0m 477 
Q 484+42  T 526  [92m☑[0m 526 
Q 967-603 T 364  [92m☑[0m 364 
Q 86-31   T 55   [92m☑[0m 55  
Q 652-194 T 458  [92m☑[0m 458 
Q 301-85  T 216  [92m☑[0m 216 
Q 490-356 T 134  [92m☑[0m 134 
Q 68+939  T 1007 [92m☑[0m 1007
Q 521+25  T 546  [92m☑[0m 546 
Q 79+390  T 469  [92m☑[0m 469 

--------------------------------------------------
Iteration 71
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 948-423 T 525  [92m☑[0m 525 
Q 898-94  T 804  [92m☑[0m 804 
Q 64+83   T 147  [92m☑[0m 147 
Q 554-95  T 459  [92m☑[0m 459 
Q 939+193 T 1132 [92m☑[0m 1132
Q 556-361 T 195  [92m☑[0m 195 
Q 64+559  T 623  [92m☑[0m 623 
Q 185-77  T 108  [92m☑[0m 108 
Q 352-5   T 347  [92m☑[0m 347 
Q 675+570 T 1245 [92m☑[0m 1245

--------------------------------------------------
Iteration 72
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 44+963  T 1007 [92m☑[0m 1007
Q 307-97  T 210  [92m☑[0m 210 
Q 967-21  T 946  [92m☑[0

Q 668+143 T 811  [92m☑[0m 811 
Q 744+12  T 756  [92m☑[0m 756 
Q 993+629 T 1622 [92m☑[0m 1622
Q 148-2   T 146  [92m☑[0m 146 
Q 34+774  T 808  [92m☑[0m 808 
Q 643-76  T 567  [92m☑[0m 567 
Q 105-68  T 37   [92m☑[0m 37  
Q 216+14  T 230  [92m☑[0m 230 
Q 741-3   T 738  [92m☑[0m 738 
Q 81+514  T 595  [92m☑[0m 595 

--------------------------------------------------
Iteration 85
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 152-50  T 102  [92m☑[0m 102 
Q 436-81  T 355  [92m☑[0m 355 
Q 78+62   T 140  [92m☑[0m 140 
Q 510-206 T 304  [91m☒[0m 303 
Q 669-662 T 7    [91m☒[0m 6   
Q 491+51  T 542  [92m☑[0m 542 
Q 86+503  T 589  [91m☒[0m 599 
Q 773+40  T 813  [92m☑[0m 813 
Q 634+37  T 671  [92m☑[0m 671 
Q 626+9   T 635  [92m☑[0m 635 

--------------------------------------------------
Iteration 86
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 997-7   T 990  [92m☑[0m 990 
Q 8+9     T 17   [91m☒[0m 16  
Q 277-38  T 239  [92m☑[0

Q 979-5   T 974  [92m☑[0m 974 
Q 555-155 T 400  [92m☑[0m 400 
Q 439-134 T 305  [92m☑[0m 305 
Q 261-93  T 168  [92m☑[0m 168 
Q 901-231 T 670  [92m☑[0m 670 
Q 255+343 T 598  [91m☒[0m 698 
Q 81+281  T 362  [92m☑[0m 362 
Q 212+545 T 757  [92m☑[0m 757 
Q 723+3   T 726  [92m☑[0m 726 
Q 239+51  T 290  [92m☑[0m 290 

--------------------------------------------------
Iteration 99
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 860-457 T 403  [92m☑[0m 403 
Q 547+351 T 898  [92m☑[0m 898 
Q 945-284 T 661  [92m☑[0m 661 
Q 329-85  T 244  [92m☑[0m 244 
Q 369+503 T 872  [92m☑[0m 872 
Q 985-768 T 217  [92m☑[0m 217 
Q 872-219 T 653  [92m☑[0m 653 
Q 918-25  T 893  [92m☑[0m 893 
Q 79+390  T 469  [92m☑[0m 469 
Q 599+791 T 1390 [91m☒[0m 1380


In [34]:
model.save_weights('layer_1.h5')
# model.load_weights('my_model_weights.h5')

### Testing

In [35]:
evaluated_loss, evaludated_accuracy = model.evaluate(x=test_x, y=test_y)



In [36]:
print("testing loss:{}, testing accuracy:{}".format(evaluated_loss, evaludated_accuracy))

testing loss:0.05609151270463287, testing accuracy:0.9845333333333334
