In [1]:
from keras import layers
from keras.models import Sequential
from keras.models import load_model
import numpy as np
import copy
import os
import time
from six.moves import range

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
DATA_SIZE = 80
DIGITS = 4
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
ITERATIONS=10
TRAINING_SIZE = int(DATA_SIZE * 0.8)
TESTING_SIZE = DATA_SIZE - TRAINING_SIZE
HISTORY = []

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

# Data Generation

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < DATA_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80


In [8]:
print(questions[:5], expected[:5])

['6077+7   ', '532+9452 ', '3+8      ', '780+6    ', '8921+6   '] ['6084 ', '9984 ', '11   ', '786  ', '8927 ']


# Processing

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:TRAINING_SIZE]
train_y = y[:TRAINING_SIZE]
test_x = x[TRAINING_SIZE:]
test_y = y[TRAINING_SIZE:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(58, 9, 12)
(58, 5, 12)
Validation Data:
(6, 9, 12)
(6, 5, 12)
Testing Data:
(16, 9, 12)
(16, 5, 12)


In [11]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False False  True False False]
  [False False False False False False False False False False  True False]
  [False False  True False False False False False False False False False]
  [False  True False False False False False False False False False False]
  [False False False False False False False False  True False False False]
  [ True False False False False False False False False False False False]
  [ True False False False False False False False False False False False]
  [ True False False False False False False False False False False False]
  [ True False False False False False False False False False False False]]

 [[False False False  True False False False False False False False False]
  [False False False False False False  True False False False False False]
  [False False False False False False False False  True False False False]
  [False False False False False False False False False False False  True]
  

# Build Model

In [12]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 5, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 5, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 5, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 5, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


# Training

In [14]:
val_acc = 0
iteration = 1
start = time.time()
while val_acc < 0.9:
    print()
    print('-' * 50)
    print('Iteration', iteration)
    results = model.fit(x_train, y_train,
                              batch_size=BATCH_SIZE,
                              epochs=1,
                              validation_data=(x_val, y_val))
    
    HISTORY.append([str(results.history[i][0]) for i in sorted(results.history)])
    val_acc = results.history["val_acc"][0]
    iteration += 1
end = time.time() - start


--------------------------------------------------
Iteration 1
Train on 58 samples, validate on 6 samples
Epoch 1/1


In [15]:
model_name = "{:d}d_adder.h5".format(DIGITS)
model.save(os.path.join("model",model_name))
log_name = "{:d}d_adder.csv".format(DIGITS)
with open(os.path.join("log",log_name), 'w') as wf:
    wf.write('acc,loss,val_acc,val_loss\n')
    for line in HISTORY:
        wf.write(",".join(line)+"\n")
    wf.write("Time,{}\n".format(str(end)))

# Validation

In [16]:
right = 0
preds = model.predict_classes(test_x, verbose=0)
for i in range(len(preds)):
    q = ctable.decode(test_x[i])
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
        right += 1
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)
print("MSG : Accuracy is {}".format(right / len(preds)))

Q 3754+6838 T 10592 [91m☒[0m 3    
Q 8127+2    T 8129  [91m☒[0m      
Q 156+239   T 395   [91m☒[0m      
Q 2252+236  T 2488  [91m☒[0m      
Q 2+76      T 78    [91m☒[0m      
Q 15+7666   T 7681  [91m☒[0m      
Q 3+8       T 11    [91m☒[0m      
Q 944+3261  T 4205  [91m☒[0m      
Q 86+4045   T 4131  [91m☒[0m      
Q 2+4       T 6     [91m☒[0m      
Q 2+779     T 781   [91m☒[0m      
Q 9137+6    T 9143  [91m☒[0m      
Q 410+74    T 484   [91m☒[0m      
Q 84+720    T 804   [91m☒[0m      
Q 518+29    T 547   [91m☒[0m      
Q 9983+7736 T 17719 [91m☒[0m 3    
MSG : Accuracy is 0.0
