In [2]:
import numpy as np
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, RNN
from keras import layers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [6]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+- '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

### one-hot encoding converter

In [7]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [8]:
ctable = CharacterTable(chars)

### one-hot encoding map

In [9]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '-',
 3: '0',
 4: '1',
 5: '2',
 6: '3',
 7: '4',
 8: '5',
 9: '6',
 10: '7',
 11: '8',
 12: '9'}

## Data Generation

- addition data: 80000
- substraction data: 80000
- total data: 160000
- length of the sequence: 7

In [10]:
questions_with_plus = []
expected_with_plus = []
seen_with_plus = set()
print('Generating data...')
while len(questions_with_plus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen_with_plus:
        continue
    seen_with_plus.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_plus.append(query)
    expected_with_plus.append(ans)
print('Total addition questions:', len(questions_with_plus))

Generating data...
Total addition questions: 80000


### addition data sample

In [36]:
print(questions_with_plus[:5], expected_with_plus[:5])

['28+54  ', '941+950', '681+55 ', '987+810', '4+7    '] ['82  ', '1891', '736 ', '1797', '11  ']


In [15]:
questions_with_minus = []
expected_with_minus = []
seen_with_minus = set()
print('Generating data...')
while len(questions_with_minus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if a < b:
        continue
    if key in seen_with_minus:
        continue
    seen_with_minus.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_minus.append(query)
    expected_with_minus.append(ans)
print('Total subtraction questions:', len(questions_with_minus))

Generating data...
Total subtraction questions: 80000


### subtraction data sample

In [16]:
print(questions_with_minus[:5], expected_with_minus[:5])

['784-99 ', '90-11  ', '753-570', '8-5    ', '866-699'] ['685 ', '79  ', '183 ', '3   ', '167 ']


### combine addtion & subtraction data

In [20]:
questions = questions_with_plus + questions_with_minus
expected = expected_with_plus + expected_with_minus

## Processing

### transfer data to one-hot representation

In [21]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


### Split data into training, validation, testing

In [37]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:40000]
train_y = y[:40000]
test_x = x[40000:]
test_y = y[40000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(36000, 7, 13)
(36000, 4, 13)
Validation Data:
(4000, 7, 13)
(4000, 4, 13)
Testing Data:
(120000, 7, 13)
(120000, 4, 13)


In [28]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False False False  True
   False False]
  [False False False False False False False False  True False False
   False False]
  [False False False False False False False False False False False
    True False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False False False False False  True False
   False False]
  [False False False False False False  True False False False False
   False False]
  [False False False  True False False False False False False False
   False False]]

 [[False False False False False False False False False  True False
   False False]
  [False False False False False False False False False False  True
   False False]
  [False  True False False False False False False False False False
   False False]
  [False False False False False False False False False False  True
   False False]
  [False False False False False False False False Fals

## Build Model

In [29]:
# def get_model():
#     inputs = Input(shape=(7, 12))
#     lstm1 = LSTM(units=128, name='lstm1')(inputs)
#     model = Model(inputs=[inputs], output=[lstm1])
#     model.compile(optimizer='adam', loss='mean_squared_error', metric='accuracy')
#     return model

def get_model():
    model = Sequential()
    # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
    # Note: In a situation where your input sequences have a variable length,
    # use input_shape=(None, num_feature).
    model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
    # As the decoder RNN's input, repeatedly provide with the last output of
    # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
    # length of output, e.g., when DIGITS=3, max output is 999+999=1998.
    model.add(layers.RepeatVector(DIGITS + 1))
    # The decoder RNN could be multiple layers stacked or a single layer.
    for _ in range(LAYERS):
        # By setting return_sequences to True, return not only the last output but
        # all the outputs so far in the form of (num_samples, timesteps,
        # output_dim). This is necessary as TimeDistributed in the below expects
        # the first dimension to be the timesteps.
        model.add(RNN(HIDDEN_SIZE, return_sequences=True))

    # Apply a dense layer to the every temporal slice of an input. For each of step
    # of the output sequence, decide which character should be chosen.
    model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

In [30]:
print('Build model...')

model = get_model()

model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 13)             1677      
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


### Training

In [31]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 165+131 T 296  [91m☒[0m 107 
Q 642-15  T 627  [91m☒[0m 12  
Q 668-90  T 578  [91m☒[0m 177 
Q 739+14  T 753  [91m☒[0m 107 
Q 769+559 T 1328 [91m☒[0m 1107
Q 667-136 T 531  [91m☒[0m 177 
Q 785-84  T 701  [91m☒[0m 177 
Q 243+74  T 317  [91m☒[0m 177 
Q 742+397 T 1139 [91m☒[0m 110 
Q 660+610 T 1270 [91m☒[0m 110 

--------------------------------------------------
Iteration 1
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 655-89  T 566  [91m☒[0m 466 
Q 420-41  T 379  [91m☒[0m 222 
Q 58-12   T 46   [91m☒[0m 22  
Q 938-30  T 908  [91m☒[0m 467 
Q 84+209  T 293  [91m☒[0m 108 
Q 338+19  T 357  [91m☒[0m 102 
Q 563-496 T 67   [91m☒[0m 222 
Q 968-37  T 931  [91m☒[0m 868 
Q 91+225  T 316  [91m☒[0m 102 
Q 860-438 T 422  [91m☒[0m 227 

--------------------------------------------------
Iteration 2
Train on 36000 samples, valida

Q 416-41  T 375  [91m☒[0m 376 
Q 7+769   T 776  [91m☒[0m 774 
Q 896+44  T 940  [91m☒[0m 931 
Q 352+91  T 443  [91m☒[0m 419 
Q 90+767  T 857  [91m☒[0m 767 
Q 366-311 T 55   [91m☒[0m 12  
Q 3+263   T 266  [91m☒[0m 272 
Q 856-608 T 248  [91m☒[0m 107 
Q 671+344 T 1015 [91m☒[0m 1004
Q 29+962  T 991  [92m☑[0m 991 

--------------------------------------------------
Iteration 15
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 8+682   T 690  [92m☑[0m 690 
Q 373+97  T 470  [91m☒[0m 464 
Q 64+769  T 833  [91m☒[0m 820 
Q 62+3    T 65   [91m☒[0m 63  
Q 780-25  T 755  [91m☒[0m 752 
Q 178+688 T 866  [92m☑[0m 866 
Q 658-29  T 629  [91m☒[0m 638 
Q 171-35  T 136  [91m☒[0m 146 
Q 955-15  T 940  [91m☒[0m 938 
Q 316+106 T 422  [91m☒[0m 453 

--------------------------------------------------
Iteration 16
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 587-6   T 581  [91m☒[0m 579 
Q 343+86  T 429  [91m☒[0m 428 
Q 442-92  T 350  [91m☒[0

Q 61+78   T 139  [91m☒[0m 149 
Q 130+806 T 936  [91m☒[0m 934 
Q 791+1   T 792  [91m☒[0m 791 
Q 487+443 T 930  [91m☒[0m 102 
Q 5+641   T 646  [91m☒[0m 647 
Q 600+95  T 695  [91m☒[0m 696 
Q 155-83  T 72   [91m☒[0m 73  
Q 79+297  T 376  [92m☑[0m 376 
Q 751-20  T 731  [92m☑[0m 731 
Q 732-88  T 644  [91m☒[0m 654 

--------------------------------------------------
Iteration 29
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 62-9    T 53   [91m☒[0m 52  
Q 287-89  T 198  [91m☒[0m 199 
Q 224+74  T 298  [92m☑[0m 298 
Q 69+323  T 392  [91m☒[0m 391 
Q 14+550  T 564  [92m☑[0m 564 
Q 347+773 T 1120 [91m☒[0m 1122
Q 738+129 T 867  [91m☒[0m 868 
Q 975+4   T 979  [91m☒[0m 980 
Q 289-189 T 100  [91m☒[0m 10  
Q 612+840 T 1452 [91m☒[0m 1462

--------------------------------------------------
Iteration 30
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 38+556  T 594  [92m☑[0m 594 
Q 994+369 T 1363 [91m☒[0m 1354
Q 94-70   T 24   [91m☒[0

Q 105-67  T 38   [91m☒[0m 48  
Q 313-5   T 308  [92m☑[0m 308 
Q 584-452 T 132  [92m☑[0m 132 
Q 60+525  T 585  [92m☑[0m 585 
Q 992+466 T 1458 [92m☑[0m 1458
Q 366-311 T 55   [92m☑[0m 55  
Q 616+59  T 675  [92m☑[0m 675 
Q 489-144 T 345  [92m☑[0m 345 
Q 154-99  T 55   [91m☒[0m 65  
Q 95+183  T 278  [91m☒[0m 279 

--------------------------------------------------
Iteration 43
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 519+145 T 664  [92m☑[0m 664 
Q 51-49   T 2    [91m☒[0m 3   
Q 174+243 T 417  [92m☑[0m 417 
Q 93+639  T 732  [92m☑[0m 732 
Q 967-96  T 871  [92m☑[0m 871 
Q 798-732 T 66   [91m☒[0m 56  
Q 86-43   T 43   [92m☑[0m 43  
Q 656+991 T 1647 [92m☑[0m 1647
Q 8+545   T 553  [92m☑[0m 553 
Q 321-12  T 309  [92m☑[0m 309 

--------------------------------------------------
Iteration 44
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 280+8   T 288  [92m☑[0m 288 
Q 9+350   T 359  [92m☑[0m 359 
Q 239-230 T 9    [91m☒[0

Q 302+4   T 306  [92m☑[0m 306 
Q 962-482 T 480  [91m☒[0m 470 
Q 49+292  T 341  [92m☑[0m 341 
Q 518+45  T 563  [92m☑[0m 563 
Q 324+956 T 1280 [92m☑[0m 1280
Q 549-171 T 378  [92m☑[0m 378 
Q 342-5   T 337  [92m☑[0m 337 
Q 933-843 T 90   [91m☒[0m 80  
Q 703+835 T 1538 [91m☒[0m 1548
Q 817-21  T 796  [91m☒[0m 896 

--------------------------------------------------
Iteration 57
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 433-177 T 256  [92m☑[0m 256 
Q 692-81  T 611  [92m☑[0m 611 
Q 360-23  T 337  [92m☑[0m 337 
Q 941+816 T 1757 [91m☒[0m 1756
Q 412+386 T 798  [92m☑[0m 798 
Q 866-59  T 807  [91m☒[0m 707 
Q 843+57  T 900  [92m☑[0m 900 
Q 515-453 T 62   [91m☒[0m 51  
Q 420+75  T 495  [92m☑[0m 495 
Q 157+9   T 166  [92m☑[0m 166 

--------------------------------------------------
Iteration 58
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 951-947 T 4    [91m☒[0m 3   
Q 135-72  T 63   [92m☑[0m 63  
Q 827-1   T 826  [92m☑[0

Q 944+410 T 1354 [92m☑[0m 1354
Q 941+345 T 1286 [92m☑[0m 1286
Q 9+213   T 222  [92m☑[0m 222 
Q 986-88  T 898  [92m☑[0m 898 
Q 379-23  T 356  [92m☑[0m 356 
Q 228+290 T 518  [91m☒[0m 508 
Q 61+263  T 324  [92m☑[0m 324 
Q 337-37  T 300  [92m☑[0m 300 
Q 985-522 T 463  [92m☑[0m 463 
Q 88+17   T 105  [92m☑[0m 105 

--------------------------------------------------
Iteration 71
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 133+43  T 176  [92m☑[0m 176 
Q 395+475 T 870  [91m☒[0m 860 
Q 442-200 T 242  [92m☑[0m 242 
Q 174+605 T 779  [92m☑[0m 779 
Q 31+461  T 492  [92m☑[0m 492 
Q 165+594 T 759  [91m☒[0m 769 
Q 738-533 T 205  [92m☑[0m 205 
Q 769+16  T 785  [92m☑[0m 785 
Q 8+229   T 237  [92m☑[0m 237 
Q 217+511 T 728  [92m☑[0m 728 

--------------------------------------------------
Iteration 72
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 37+931  T 968  [92m☑[0m 968 
Q 738-74  T 664  [92m☑[0m 664 
Q 981-40  T 941  [92m☑[0

Q 191+83  T 274  [92m☑[0m 274 
Q 722+729 T 1451 [92m☑[0m 1451
Q 852-74  T 778  [92m☑[0m 778 
Q 811-6   T 805  [92m☑[0m 805 
Q 620+158 T 778  [92m☑[0m 778 
Q 863+285 T 1148 [92m☑[0m 1148
Q 973-76  T 897  [92m☑[0m 897 
Q 51+608  T 659  [91m☒[0m 669 
Q 268-61  T 207  [92m☑[0m 207 
Q 198+83  T 281  [92m☑[0m 281 

--------------------------------------------------
Iteration 85
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 899+233 T 1132 [91m☒[0m 1131
Q 678+42  T 720  [91m☒[0m 710 
Q 89+322  T 411  [92m☑[0m 411 
Q 405+650 T 1055 [92m☑[0m 1055
Q 915+339 T 1254 [92m☑[0m 1254
Q 826+94  T 920  [91m☒[0m 910 
Q 495-461 T 34   [92m☑[0m 34  
Q 839+158 T 997  [91m☒[0m 9087
Q 19+910  T 929  [92m☑[0m 929 
Q 910-763 T 147  [92m☑[0m 147 

--------------------------------------------------
Iteration 86
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 635+709 T 1344 [92m☑[0m 1344
Q 877-23  T 854  [92m☑[0m 854 
Q 30+177  T 207  [91m☒[0

Q 864-44  T 820  [92m☑[0m 820 
Q 512+197 T 709  [92m☑[0m 709 
Q 209-22  T 187  [92m☑[0m 187 
Q 15+953  T 968  [92m☑[0m 968 
Q 81+33   T 114  [92m☑[0m 114 
Q 601-77  T 524  [92m☑[0m 524 
Q 652-80  T 572  [92m☑[0m 572 
Q 86-43   T 43   [92m☑[0m 43  
Q 385-78  T 307  [92m☑[0m 307 
Q 84+694  T 778  [92m☑[0m 778 

--------------------------------------------------
Iteration 99
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 685+61  T 746  [92m☑[0m 746 
Q 857-505 T 352  [92m☑[0m 352 
Q 138-2   T 136  [92m☑[0m 136 
Q 161+28  T 189  [92m☑[0m 189 
Q 0+232   T 232  [91m☒[0m 242 
Q 182+209 T 391  [92m☑[0m 391 
Q 48+99   T 147  [92m☑[0m 147 
Q 96+181  T 277  [92m☑[0m 277 
Q 690+22  T 712  [92m☑[0m 712 
Q 968+6   T 974  [92m☑[0m 974 


In [32]:
model.save_weights('layer_1.h5')
# model.load_weights('my_model_weights.h5')

### Testing

In [34]:
evaluated_loss, evaludated_accuracy = model.evaluate(x=test_x, y=test_y)



In [35]:
print("testing loss:{}, testing accuracy:{}".format(evaluated_loss, evaludated_accuracy))

testing loss:0.06599857666182021, testing accuracy:0.9775604166666667
