In [1]:
import numpy as np
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, RNN
from keras import layers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+- '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

### one-hot encoding converter

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

### one-hot encoding map

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '-',
 3: '0',
 4: '1',
 5: '2',
 6: '3',
 7: '4',
 8: '5',
 9: '6',
 10: '7',
 11: '8',
 12: '9'}

## Data Generation

- addition data: 80000
- substraction data: 80000
- total data: 160000
- length of the sequence: 7

In [7]:
questions_with_plus = []
expected_with_plus = []
seen_with_plus = set()
print('Generating data...')
while len(questions_with_plus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen_with_plus:
        continue
    seen_with_plus.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_plus.append(query)
    expected_with_plus.append(ans)
print('Total addition questions:', len(questions_with_plus))

Generating data...
Total addition questions: 80000


### addition data sample

In [8]:
print(questions_with_plus[:5], expected_with_plus[:5])

['3+6    ', '7+88   ', '2+679  ', '47+5   ', '38+617 '] ['9   ', '95  ', '681 ', '52  ', '655 ']


In [9]:
questions_with_minus = []
expected_with_minus = []
seen_with_minus = set()
print('Generating data...')
while len(questions_with_minus) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if a < b:
        continue
    if key in seen_with_minus:
        continue
    seen_with_minus.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions_with_minus.append(query)
    expected_with_minus.append(ans)
print('Total subtraction questions:', len(questions_with_minus))

Generating data...
Total subtraction questions: 80000


### subtraction data sample

In [10]:
print(questions_with_minus[:5], expected_with_minus[:5])

['11-3   ', '44-6   ', '74-41  ', '569-151', '83-8   '] ['8   ', '38  ', '33  ', '418 ', '75  ']


### combine addtion & subtraction data

In [11]:
questions = questions_with_plus + questions_with_minus
expected = expected_with_plus + expected_with_minus

## Processing

### transfer data to one-hot representation

In [12]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


### Split data into training, validation, testing

In [13]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:40000]
train_y = y[:40000]
test_x = x[40000:]
test_y = y[40000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(36000, 7, 13)
(36000, 4, 13)
Validation Data:
(4000, 7, 13)
(4000, 4, 13)
Testing Data:
(120000, 7, 13)
(120000, 4, 13)


In [14]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False  True False False False False False
   False False]
  [False False False False False False False False False  True False
   False False]
  [False False False False False False  True False False False False
   False False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False  True False False False False False
   False False]
  [False False False  True False False False False False False False
   False False]
  [ True False False False False False False False False False False
   False False]]

 [[False False False False False False  True False False False False
   False False]
  [False False False False False False False  True False False False
   False False]
  [False False False False False False  True False False False False
   False False]
  [False  True False False False False False False False False False
   False False]
  [False False False False False False False False Fals

## Build Model

In [15]:
def get_model():
    model = Sequential()
    model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
    model.add(layers.RepeatVector(DIGITS + 1))
    for _ in range(LAYERS):
        model.add(RNN(HIDDEN_SIZE, return_sequences=True))
    model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [16]:
print('Build model...')
model = get_model()
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 13)             1677      
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


### Training

In [None]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 597-390 T 207  [91m☒[0m 103 
Q 291-15  T 276  [91m☒[0m 10  
Q 833+73  T 906  [91m☒[0m 103 
Q 871+950 T 1821 [91m☒[0m 1103
Q 64-52   T 12   [91m☒[0m 20  
Q 635-109 T 526  [91m☒[0m 103 
Q 386-19  T 367  [91m☒[0m 103 
Q 191-54  T 137  [91m☒[0m 10  
Q 902-41  T 861  [91m☒[0m 103 
Q 983+92  T 1075 [91m☒[0m 100 

--------------------------------------------------
Iteration 1
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 16+488  T 504  [91m☒[0m 105 
Q 381-31  T 350  [91m☒[0m 245 
Q 154-59  T 95   [91m☒[0m 245 
Q 97-23   T 74   [91m☒[0m 64  
Q 788+11  T 799  [91m☒[0m 105 
Q 668-501 T 167  [91m☒[0m 500 
Q 998-885 T 113  [91m☒[0m 105 
Q 108+782 T 890  [91m☒[0m 104 
Q 38+525  T 563  [91m☒[0m 105 
Q 75+174  T 249  [91m☒[0m 105 

--------------------------------------------------
Iteration 2
Train on 36000 samples, valida

Q 775-2   T 773  [91m☒[0m 766 
Q 790+13  T 803  [91m☒[0m 799 
Q 409-66  T 343  [92m☑[0m 343 
Q 58+54   T 112  [91m☒[0m 110 
Q 693+720 T 1413 [91m☒[0m 1300
Q 389+607 T 996  [91m☒[0m 966 
Q 53+845  T 898  [91m☒[0m 894 
Q 651-39  T 612  [91m☒[0m 617 
Q 775+723 T 1498 [91m☒[0m 1406
Q 580-163 T 417  [91m☒[0m 491 

--------------------------------------------------
Iteration 15
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 45+253  T 298  [91m☒[0m 289 
Q 716-75  T 641  [92m☑[0m 641 
Q 185-24  T 161  [91m☒[0m 155 
Q 267+328 T 595  [91m☒[0m 605 
Q 103+574 T 677  [91m☒[0m 788 
Q 216-14  T 202  [91m☒[0m 295 
Q 976-545 T 431  [91m☒[0m 434 
Q 785-540 T 245  [91m☒[0m 234 
Q 299+48  T 347  [91m☒[0m 344 
Q 882-88  T 794  [91m☒[0m 796 

--------------------------------------------------
Iteration 16
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 709-40  T 669  [91m☒[0m 651 
Q 941+169 T 1110 [91m☒[0m 1113
Q 895-7   T 888  [91m☒[0

Q 868-70  T 798  [91m☒[0m 788 
Q 5+446   T 451  [91m☒[0m 441 
Q 872-51  T 821  [91m☒[0m 822 
Q 823+4   T 827  [92m☑[0m 827 
Q 981-205 T 776  [92m☑[0m 776 
Q 785+608 T 1393 [91m☒[0m 1392
Q 213-75  T 138  [91m☒[0m 137 
Q 149+543 T 692  [91m☒[0m 702 
Q 47+133  T 180  [92m☑[0m 180 
Q 794-11  T 783  [91m☒[0m 782 

--------------------------------------------------
Iteration 29
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 449-163 T 286  [91m☒[0m 274 
Q 654+61  T 715  [92m☑[0m 715 
Q 749+99  T 848  [91m☒[0m 857 
Q 45+535  T 580  [91m☒[0m 570 
Q 897+8   T 905  [91m☒[0m 994 
Q 169+63  T 232  [92m☑[0m 232 
Q 228+966 T 1194 [91m☒[0m 1294
Q 628-5   T 623  [92m☑[0m 623 
Q 199-91  T 108  [92m☑[0m 108 
Q 29+615  T 644  [92m☑[0m 644 

--------------------------------------------------
Iteration 30
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 729-20  T 709  [91m☒[0m 708 
Q 88+778  T 866  [92m☑[0m 866 
Q 116+202 T 318  [91m☒[0

In [None]:
model.save_weights('layer_1.h5')
# model.load_weights('my_model_weights.h5')

### Testing

In [None]:
evaluated_loss, evaludated_accuracy = model.evaluate(x=test_x, y=test_y)

In [None]:
print("testing loss:{}, testing accuracy:{}".format(evaluated_loss, evaludated_accuracy))

# Report

## Q1: Analyze the results under different number of digits, training epoch, training size ...