In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
DIGITS = 3
if DIGITS < 3:
    TRAINING_SIZE = 8000
    Train_num = 2000
elif DIGITS == 3:
    TRAINING_SIZE = 400000
    Train_num = 100000
else:
    TRAINING_SIZE = 80000
    Train_num = 40000

REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
Add_data = False
Sub_data = False
Mul_data = True
if Mul_data :
    Train_num *= 3
if Add_data and Sub_data:
    chars = '0123456789+- '
elif Add_data :
    chars = '0123456789+ '
elif Mul_data:
    chars = '0123456789* '
else:
    chars = '0123456789- '

RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '*',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

# Data Generation

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    if Add_data and Sub_data:
        r = np.random.randint(2,size = 1)
        if r == 0:
            key = tuple(sorted([0, a, b], reverse = True))
            q = '{}+{}'.format(key[0], key[1])
            ans = str(key[0] + key[1])
        else:
            key = tuple(sorted([-1, a, b], reverse = True))
            q = '{}-{}'.format(key[0], key[1])
            ans = str(key[0] - key[1])
        if key in seen:
            continue
        seen.add(key)
        ans += ' ' * (DIGITS + 1 - len(ans))
    elif Add_data :
        key = tuple(sorted([0, a, b], reverse = True))
        q = '{}+{}'.format(key[0], key[1])
        ans = str(key[0] + key[1])
        ans += ' ' * (DIGITS + 1 - len(ans))
        if key in seen:
            continue
        seen.add(key)
    elif Mul_data :
        key = tuple(sorted([0, a, b], reverse = True))
        q = '{}*{}'.format(key[0], key[1])
        ans = str(key[0] * key[1])
        ans += ' ' * (2 * DIGITS - len(ans))
        if key in seen:
            continue
        seen.add(key)
    else:
        key = tuple(sorted([0, a, b], reverse = True))
        q = '{}-{}'.format(key[0], key[1])
        ans = str(key[0] - key[1])
        ans += ' ' * (DIGITS + 1 - len(ans))
        if key in seen:
            continue
        seen.add(key)

    query = q + ' ' * (MAXLEN - len(q))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 400000


In [8]:
print(questions[:5], expected[:5])

['90*3   ', '989*9  ', '980*4  ', '210*0  ', '92*20  '] ['270   ', '8901  ', '3920  ', '0     ', '1840  ']


# Processing

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
if Mul_data:
    y = np.zeros((len(expected), 2 * DIGITS, len(chars)), dtype=np.bool)
else:
    y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    if Mul_data:
        y[i] = ctable.encode(sentence, 2 * DIGITS)
    else:
        y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:Train_num]
train_y = y[:Train_num]
test_x = x[Train_num:]
test_y = y[Train_num:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(270000, 7, 12)
(270000, 6, 12)
Validation Data:
(30000, 7, 12)
(30000, 6, 12)
Testing Data:
(100000, 7, 12)
(100000, 6, 12)


In [11]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False False  True False
   False]
  [False False False  True False False False False False False False
   False]
  [False False False False False False False False  True False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False  True False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False False False False False False False False False False
    True]]

 [[False False False False False False False False False False False
    True]
  [False False False  True False False False False False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False False False False False False Fal

# Build Model

In [12]:
print('Build model...')

############################################
##### Build your own model here ############
############################################
from keras.layers import Dense, Bidirectional, Lambda, RepeatVector, TimeDistributed

model = Sequential()
model.add(Bidirectional(RNN(units = HIDDEN_SIZE*2), input_shape = (x_train.shape[1], x_train.shape[2])))
model.add(RepeatVector(y_train.shape[1]))
model.add(RNN(units = HIDDEN_SIZE*4, return_sequences = True))
model.add(TimeDistributed(Dense(units = y_train.shape[2], activation = 'softmax')))
model.add(Lambda(lambda x: x[:,::-1,:]))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 512)               550912    
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 6, 512)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 6, 512)            2099200   
_________________________________________________________________
time_distributed_1 (TimeDist (None, 6, 12)             6156      
_________________________________________________________________
lambda_1 (Lambda)            (None, 6, 12)             0         
Total params: 2,656,268
Trainable params: 2,656,268
Non-trainable params: 0
_________________________________________________________________


# Training

In [13]:
it = [10,20,50,75,100]
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)
    if (iteration+1) in it:
        acc = 0.0
        pre_y = model.predict(test_x)
        pre_y = np.argmax(pre_y,axis = 2)
        for i in range(test_x.shape[0]):
            rowx, rowy = test_x, test_y
            q = ctable.decode(rowx[i])
            correct = ctable.decode(rowy[i])
            guess = ctable.decode(pre_y[i], calc_argmax=False)
            if correct == guess:
                acc = acc + 1.0
        print('accuracy = ',int(acc), "/", test_x.shape[0], "   ", (acc*100.0/test_x.shape[0]) )


--------------------------------------------------
Iteration 0
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 676*393 T 265668 [91m☒[0m 251288
Q 305*12  T 3660   [91m☒[0m 3500  
Q 825*76  T 62700  [91m☒[0m 64450 
Q 899*412 T 370388 [91m☒[0m 374168
Q 903*774 T 698922 [91m☒[0m 711122
Q 460*16  T 7360   [91m☒[0m 7140  
Q 760*62  T 47120  [91m☒[0m 40120 
Q 657*514 T 337698 [91m☒[0m 334568
Q 861*409 T 352149 [91m☒[0m 363549
Q 785*293 T 230005 [91m☒[0m 234105

--------------------------------------------------
Iteration 1
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 605*547 T 330935 [91m☒[0m 334785
Q 783*218 T 170694 [91m☒[0m 177734
Q 404*344 T 138976 [91m☒[0m 147476
Q 939*441 T 414099 [91m☒[0m 405079
Q 915*760 T 695400 [91m☒[0m 697400
Q 254*55  T 13970  [91m☒[0m 14770 
Q 461*276 T 127236 [91m☒[0m 127796
Q 819*64  T 52416  [91m☒[0m 52136 
Q 801*246 T 197046 [91m☒[0m 195786
Q 588*76  T 44688  [91m☒[0m 44448 

---------

Q 522*201 T 104922 [91m☒[0m 104722
Q 905*865 T 782825 [92m☑[0m 782825
Q 770*639 T 492030 [91m☒[0m 484030
Q 799*177 T 141423 [91m☒[0m 140023
Q 494*395 T 195130 [91m☒[0m 195730
Q 488*371 T 181048 [91m☒[0m 181248
Q 771*340 T 262140 [91m☒[0m 264140
Q 736*715 T 526240 [91m☒[0m 526640
Q 195*87  T 16965  [91m☒[0m 16765 
Q 365*45  T 16425  [92m☑[0m 16425 

--------------------------------------------------
Iteration 14
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 841*398 T 334718 [91m☒[0m 334118
Q 760*412 T 313120 [92m☑[0m 313120
Q 948*577 T 546996 [91m☒[0m 548596
Q 992*139 T 137888 [91m☒[0m 137488
Q 788*760 T 598880 [91m☒[0m 596880
Q 80*26   T 2080   [92m☑[0m 2080  
Q 876*621 T 543996 [91m☒[0m 544196
Q 569*488 T 277672 [91m☒[0m 278472
Q 891*583 T 519453 [91m☒[0m 520153
Q 294*63  T 18522  [92m☑[0m 18522 

--------------------------------------------------
Iteration 15
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 855*67

Q 949*281 T 266669 [91m☒[0m 266269
Q 505*93  T 46965  [91m☒[0m 46765 
Q 781*595 T 464695 [92m☑[0m 464695
Q 626*234 T 146484 [91m☒[0m 146284
Q 655*239 T 156545 [91m☒[0m 156345
Q 361*75  T 27075  [91m☒[0m 27575 
Q 554*333 T 184482 [91m☒[0m 184082
Q 969*829 T 803301 [91m☒[0m 802501
Q 732*28  T 20496  [92m☑[0m 20496 
Q 526*108 T 56808  [91m☒[0m 57408 

--------------------------------------------------
Iteration 40
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 319*14  T 4466   [92m☑[0m 4466  
Q 838*649 T 543862 [91m☒[0m 543662
Q 722*66  T 47652  [92m☑[0m 47652 
Q 651*450 T 292950 [92m☑[0m 292950
Q 939*487 T 457293 [91m☒[0m 457493
Q 169*61  T 10309  [91m☒[0m 10409 
Q 781*94  T 73414  [91m☒[0m 73814 
Q 854*782 T 667828 [91m☒[0m 678028
Q 952*179 T 170408 [91m☒[0m 170208
Q 498*9   T 4482   [92m☑[0m 4482  

--------------------------------------------------
Iteration 41
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 300*21

Q 862*144 T 124128 [91m☒[0m 123728
Q 809*264 T 213576 [91m☒[0m 213776
Q 934*573 T 535182 [91m☒[0m 535382
Q 624*170 T 106080 [92m☑[0m 106080
Q 462*78  T 36036  [91m☒[0m 36636 
Q 487*192 T 93504  [91m☒[0m 93304 
Q 785*541 T 424685 [91m☒[0m 424885
Q 803*361 T 289883 [91m☒[0m 289483
Q 555*106 T 58830  [92m☑[0m 58830 
Q 481*210 T 101010 [92m☑[0m 101010

--------------------------------------------------
Iteration 66
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 913*142 T 129646 [91m☒[0m 129446
Q 899*412 T 370388 [91m☒[0m 370188
Q 534*367 T 195978 [91m☒[0m 196178
Q 825*735 T 606375 [91m☒[0m 606875
Q 843*717 T 604431 [92m☑[0m 604431
Q 689*582 T 400998 [92m☑[0m 400998
Q 422*385 T 162470 [91m☒[0m 162070
Q 846*732 T 619272 [91m☒[0m 618872
Q 888*404 T 358752 [91m☒[0m 359152
Q 608*366 T 222528 [91m☒[0m 222128

--------------------------------------------------
Iteration 67
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 626*47

Q 496*321 T 159216 [91m☒[0m 159616
Q 632*289 T 182648 [91m☒[0m 182248
Q 153*42  T 6426   [92m☑[0m 6426  
Q 988*976 T 964288 [91m☒[0m 964488
Q 443*412 T 182516 [91m☒[0m 182116
Q 543*268 T 145524 [91m☒[0m 144924
Q 996*884 T 880464 [92m☑[0m 880464
Q 244*41  T 10004  [91m☒[0m 9864  
Q 891*548 T 488268 [92m☑[0m 488268
Q 582*243 T 141426 [91m☒[0m 141226

--------------------------------------------------
Iteration 92
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 973*187 T 181951 [91m☒[0m 182751
Q 794*565 T 448610 [91m☒[0m 449610
Q 541*322 T 174202 [91m☒[0m 174002
Q 940*870 T 817800 [92m☑[0m 817800
Q 147*144 T 21168  [91m☒[0m 20368 
Q 878*425 T 373150 [92m☑[0m 373150
Q 508*108 T 54864  [91m☒[0m 55864 
Q 741*110 T 81510  [92m☑[0m 81510 
Q 577*53  T 30581  [92m☑[0m 30581 
Q 823*241 T 198343 [91m☒[0m 198443

--------------------------------------------------
Iteration 93
Train on 270000 samples, validate on 30000 samples
Epoch 1/1
Q 800*24

# Testing

In [14]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
#####################################################

acc = 0.0
pre_y = model.predict(test_x)
pre_y = np.argmax(pre_y,axis = 2)
for i in range(test_x.shape[0]):
    rowx, rowy = test_x, test_y
    q = ctable.decode(rowx[i])
    correct = ctable.decode(rowy[i])
    guess = ctable.decode(pre_y[i], calc_argmax=False)
    if i < 10:
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
    if correct == guess:
        acc = acc + 1.0
        if i < 10:
            print(colors.ok + '☑' + colors.close, end=' ')
    else:
        if i < 10:
            print(colors.fail + '☒' + colors.close, end=' ')
    if i < 10:
        print(guess)
print('accuracy = ',int(acc), "/", test_x.shape[0], "   ", (acc*100.0/test_x.shape[0]) )

MSG : Prediction
Q 982*570 T 559740 [92m☑[0m 559740
Q 660*90  T 59400  [92m☑[0m 59400 
Q 780*296 T 230880 [92m☑[0m 230880
Q 740*51  T 37740  [92m☑[0m 37740 
Q 608*544 T 330752 [91m☒[0m 331752
Q 568*489 T 277752 [91m☒[0m 278152
Q 836*330 T 275880 [92m☑[0m 275880
Q 678*282 T 191196 [91m☒[0m 190596
Q 838*667 T 558946 [91m☒[0m 558546
Q 768*629 T 483072 [91m☒[0m 483672
accuracy =  37107 / 100000     37.107
