In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
REVERSE = True
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128

### Independent Variables 
DATA_SIZE = 30000
DIGITS = 3
MAXLEN = DIGITS + 1 + DIGITS
ansmaxlen = DIGITS + 1
chars = '0123456789+ '
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)
print(ctable.chars)
print(ctable.char_indices)
print(ctable.indices_char)

[' ', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
{' ': 0, '+': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11}
{0: ' ', 1: '+', 2: '0', 3: '1', 4: '2', 5: '3', 6: '4', 7: '5', 8: '6', 9: '7', 10: '8', 11: '9'}


# Data Generation

In [6]:
def DataGenerate(operator='+-', maxlen=7, digits=3, ansmaxlen=4):
    questions = []
    shows = []
    expected = []
    seen = set()
    print('Generating data...')
    while len(questions) < DATA_SIZE:
        f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, digits + 1))))
        fop = lambda: ''.join(np.random.choice(list(operator)) for i in range(1))
        a, b, c = f(), f(), fop()
        a, b = sorted((a, b))[::-1]
        key = tuple((a, b, c))
        if key in seen:
            continue
        seen.add(key)
        q = '{}{}{}'.format(a, c, b)
        query = q + ' ' * (maxlen - len(q))
        if c == '-':
            ans = str(a - b)
        elif c == '+':
            ans = str(a + b)
        else:
            ans = str(a * b)
        ans += ' ' * (ansmaxlen - len(ans))
        if REVERSE:
            show = query
            query = query[::-1]
        questions.append(query)
        shows.append(show)
        expected.append(ans)
    print('Total addition questions:', len(questions))
    return questions, expected, shows

In [7]:
questions, expected, shows = DataGenerate('+', maxlen=MAXLEN, digits=DIGITS, ansmaxlen=ansmaxlen)
print(shows[:5], expected[:5])

Generating data...
Total addition questions: 30000
['78+2   ', '769+78 ', '87+43  ', '408+318', '360+97 '] ['80  ', '847 ', '130 ', '726 ', '457 ']


# Processing

In [8]:
def Vectorization(questions, expected, ctable, maxlen=7, ansmaxlen=4):
    print('Vectorization...')
    x = np.zeros((len(questions), maxlen, len(chars)), dtype=np.bool)
    y = np.zeros((len(expected), ansmaxlen, len(chars)), dtype=np.bool)
    for i, sentence in enumerate(questions):
        x[i] = ctable.encode(sentence, maxlen)
    for i, sentence in enumerate(expected):
        y[i] = ctable.encode(sentence, ansmaxlen)
    return x, y
x, y = Vectorization(questions, expected, ctable, maxlen=MAXLEN, ansmaxlen=DIGITS+1)

Vectorization...


In [9]:
def shuffle(x, y, ratio=0.25):
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    x = x[indices]
    y = y[indices]
    
    split = int(len(y) * ratio)
    # train_test_split
    train_x = x[split:]
    train_y = y[split:]
    test_x = x[:split]
    test_y = y[:split]

    split_at = len(train_x) - len(train_x) // 10
    (x_train, x_val) = train_x[:split_at], train_x[split_at:]
    (y_train, y_val) = train_y[:split_at], train_y[split_at:]
    return x_train, y_train, x_val, y_val, test_x, test_y

x_train, y_train, x_val, y_val, test_x, test_y = shuffle(x,y)
print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

Training Data:
(20250, 7, 12)
(20250, 4, 12)
Validation Data:
(2250, 7, 12)
(2250, 4, 12)
Testing Data:
(7500, 7, 12)
(7500, 4, 12)
input:  [[[False False False False False False False False False False False
    True]
  [False False False False False False  True False False False False
   False]
  [False False False False False False False False  True False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False  True False False False False False False False
   False]
  [False False False False False False False False False  True False
   False]]

 [[ True False False False False False False False False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False False False False False False
    True]
  [False  True False False False False False False False False False

# Build Model

In [10]:
def BuildModel(maxlen=7, ansmaxlen=4):
    print('Build model...')

    RNN = layers.LSTM
    HIDDEN_SIZE = 128 * ansmaxlen
    BATCH_SIZE = 128
    LAYERS = 1

    model = Sequential()
    model.add(RNN(HIDDEN_SIZE, input_shape=(maxlen, len(chars))))
    model.add(layers.Reshape((ansmaxlen, 128)))
    for _ in range(LAYERS):
        model.add(RNN(HIDDEN_SIZE, return_sequences=True))
    model.add(layers.Dense(len(chars), activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.build()
    model.summary()
    return model

# Training part

In [11]:
model = BuildModel(maxlen=MAXLEN, ansmaxlen=ansmaxlen)
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, verbose=1,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 512)               1075200   
_________________________________________________________________
reshape_1 (Reshape)          (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 512)            1312768   
_________________________________________________________________
dense_1 (Dense)              (None, 4, 12)             6156      
Total params: 2,394,124
Trainable params: 2,394,124
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 0
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 267+13  T 280  [91m☒[0m 109 
Q 77+12   T 89   [91m☒[0m 79  
Q 80+77   T 157  [91m☒[0m 109 
Q 951+34  T 985  [91m☒[0m

Q 480+11  T 491  [92m☑[0m 491 
Q 328+60  T 388  [92m☑[0m 388 
Q 613+450 T 1063 [92m☑[0m 1063
Q 262+37  T 299  [91m☒[0m 399 
Q 350+2   T 352  [92m☑[0m 352 
Q 684+1   T 685  [92m☑[0m 685 
Q 377+34  T 411  [92m☑[0m 411 
Q 262+37  T 299  [91m☒[0m 399 
Q 910+68  T 978  [92m☑[0m 978 
Q 154+3   T 157  [92m☑[0m 157 

--------------------------------------------------
Iteration 14
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 675+597 T 1272 [92m☑[0m 1272
Q 691+155 T 846  [92m☑[0m 846 
Q 36+0    T 36   [92m☑[0m 36  
Q 675+58  T 733  [92m☑[0m 733 
Q 823+2   T 825  [92m☑[0m 825 
Q 916+2   T 918  [92m☑[0m 918 
Q 664+95  T 759  [92m☑[0m 759 
Q 172+59  T 231  [92m☑[0m 231 
Q 262+15  T 277  [92m☑[0m 277 
Q 860+38  T 898  [92m☑[0m 898 

--------------------------------------------------
Iteration 15
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 224+64  T 288  [92m☑[0m 288 
Q 878+84  T 962  [92m☑[0m 962 
Q 633+150 T 783  [92m☑[0

Q 40+27   T 67   [92m☑[0m 67  
Q 175+64  T 239  [92m☑[0m 239 
Q 224+46  T 270  [92m☑[0m 270 
Q 603+313 T 916  [92m☑[0m 916 
Q 552+76  T 628  [92m☑[0m 628 
Q 89+36   T 125  [92m☑[0m 125 
Q 62+16   T 78   [92m☑[0m 78  
Q 592+71  T 663  [92m☑[0m 663 
Q 877+135 T 1012 [92m☑[0m 1012
Q 492+1   T 493  [92m☑[0m 493 

--------------------------------------------------
Iteration 28
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 591+8   T 599  [92m☑[0m 599 
Q 865+407 T 1272 [92m☑[0m 1272
Q 30+23   T 53   [92m☑[0m 53  
Q 90+17   T 107  [92m☑[0m 107 
Q 65+27   T 92   [92m☑[0m 92  
Q 101+65  T 166  [92m☑[0m 166 
Q 520+81  T 601  [92m☑[0m 601 
Q 929+40  T 969  [92m☑[0m 969 
Q 945+5   T 950  [92m☑[0m 950 
Q 398+2   T 400  [92m☑[0m 400 

--------------------------------------------------
Iteration 29
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 136+24  T 160  [92m☑[0m 160 
Q 907+60  T 967  [92m☑[0m 967 
Q 136+24  T 160  [92m☑[0

Q 94+8    T 102  [92m☑[0m 102 
Q 859+204 T 1063 [92m☑[0m 1063
Q 10+8    T 18   [92m☑[0m 18  
Q 470+97  T 567  [92m☑[0m 567 
Q 159+97  T 256  [92m☑[0m 256 
Q 885+23  T 908  [92m☑[0m 908 
Q 230+1   T 231  [92m☑[0m 231 
Q 951+90  T 1041 [92m☑[0m 1041
Q 69+25   T 94   [92m☑[0m 94  
Q 93+84   T 177  [92m☑[0m 177 

--------------------------------------------------
Iteration 42
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 860+243 T 1103 [92m☑[0m 1103
Q 435+380 T 815  [92m☑[0m 815 
Q 954+94  T 1048 [92m☑[0m 1048
Q 27+12   T 39   [92m☑[0m 39  
Q 77+61   T 138  [92m☑[0m 138 
Q 402+206 T 608  [92m☑[0m 608 
Q 957+5   T 962  [92m☑[0m 962 
Q 84+60   T 144  [92m☑[0m 144 
Q 442+87  T 529  [92m☑[0m 529 
Q 800+69  T 869  [92m☑[0m 869 

--------------------------------------------------
Iteration 43
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 81+47   T 128  [92m☑[0m 128 
Q 583+11  T 594  [92m☑[0m 594 
Q 127+94  T 221  [92m☑[0

Q 790+8   T 798  [92m☑[0m 798 
Q 687+340 T 1027 [92m☑[0m 1027
Q 65+7    T 72   [92m☑[0m 72  
Q 409+9   T 418  [92m☑[0m 418 
Q 53+2    T 55   [92m☑[0m 55  
Q 927+18  T 945  [92m☑[0m 945 
Q 383+42  T 425  [92m☑[0m 425 
Q 356+15  T 371  [92m☑[0m 371 
Q 45+39   T 84   [92m☑[0m 84  
Q 63+46   T 109  [92m☑[0m 109 

--------------------------------------------------
Iteration 56
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 900+3   T 903  [92m☑[0m 903 
Q 338+7   T 345  [92m☑[0m 345 
Q 284+9   T 293  [92m☑[0m 293 
Q 486+18  T 504  [92m☑[0m 504 
Q 829+47  T 876  [92m☑[0m 876 
Q 932+555 T 1487 [92m☑[0m 1487
Q 595+72  T 667  [92m☑[0m 667 
Q 355+337 T 692  [92m☑[0m 692 
Q 687+240 T 927  [92m☑[0m 927 
Q 842+6   T 848  [92m☑[0m 848 

--------------------------------------------------
Iteration 57
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 859+6   T 865  [92m☑[0m 865 
Q 735+41  T 776  [92m☑[0m 776 
Q 981+5   T 986  [92m☑[0

Q 990+84  T 1074 [92m☑[0m 1074
Q 91+72   T 163  [92m☑[0m 163 
Q 454+394 T 848  [92m☑[0m 848 
Q 511+369 T 880  [92m☑[0m 880 
Q 466+34  T 500  [92m☑[0m 500 
Q 420+82  T 502  [92m☑[0m 502 
Q 314+18  T 332  [92m☑[0m 332 
Q 591+8   T 599  [92m☑[0m 599 
Q 499+174 T 673  [92m☑[0m 673 
Q 13+8    T 21   [92m☑[0m 21  

--------------------------------------------------
Iteration 70
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 668+44  T 712  [92m☑[0m 712 
Q 204+120 T 324  [92m☑[0m 324 
Q 845+89  T 934  [92m☑[0m 934 
Q 898+5   T 903  [92m☑[0m 903 
Q 27+12   T 39   [92m☑[0m 39  
Q 58+20   T 78   [92m☑[0m 78  
Q 367+29  T 396  [92m☑[0m 396 
Q 87+47   T 134  [92m☑[0m 134 
Q 933+820 T 1753 [92m☑[0m 1753
Q 948+0   T 948  [92m☑[0m 948 

--------------------------------------------------
Iteration 71
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 730+379 T 1109 [92m☑[0m 1109
Q 730+81  T 811  [92m☑[0m 811 
Q 936+281 T 1217 [92m☑[0

Q 853+8   T 861  [92m☑[0m 861 
Q 409+9   T 418  [92m☑[0m 418 
Q 802+11  T 813  [92m☑[0m 813 
Q 966+26  T 992  [92m☑[0m 992 
Q 95+19   T 114  [92m☑[0m 114 
Q 225+107 T 332  [92m☑[0m 332 
Q 742+77  T 819  [92m☑[0m 819 
Q 528+454 T 982  [92m☑[0m 982 
Q 859+34  T 893  [92m☑[0m 893 
Q 570+507 T 1077 [92m☑[0m 1077

--------------------------------------------------
Iteration 84
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 904+663 T 1567 [92m☑[0m 1567
Q 262+15  T 277  [92m☑[0m 277 
Q 629+329 T 958  [92m☑[0m 958 
Q 648+344 T 992  [92m☑[0m 992 
Q 752+269 T 1021 [92m☑[0m 1021
Q 777+43  T 820  [92m☑[0m 820 
Q 362+47  T 409  [92m☑[0m 409 
Q 386+37  T 423  [92m☑[0m 423 
Q 482+75  T 557  [92m☑[0m 557 
Q 922+131 T 1053 [92m☑[0m 1053

--------------------------------------------------
Iteration 85
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 48+27   T 75   [92m☑[0m 75  
Q 893+261 T 1154 [92m☑[0m 1154
Q 766+8   T 774  [92m☑[0

Q 478+27  T 505  [92m☑[0m 505 
Q 369+1   T 370  [92m☑[0m 370 
Q 979+356 T 1335 [92m☑[0m 1335
Q 981+16  T 997  [92m☑[0m 997 
Q 320+1   T 321  [92m☑[0m 321 
Q 459+8   T 467  [92m☑[0m 467 
Q 77+52   T 129  [92m☑[0m 129 
Q 287+117 T 404  [92m☑[0m 404 
Q 753+5   T 758  [92m☑[0m 758 
Q 383+28  T 411  [92m☑[0m 411 

--------------------------------------------------
Iteration 98
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 938+847 T 1785 [92m☑[0m 1785
Q 307+57  T 364  [92m☑[0m 364 
Q 900+3   T 903  [92m☑[0m 903 
Q 494+8   T 502  [92m☑[0m 502 
Q 576+94  T 670  [92m☑[0m 670 
Q 586+30  T 616  [92m☑[0m 616 
Q 684+7   T 691  [92m☑[0m 691 
Q 438+2   T 440  [92m☑[0m 440 
Q 607+98  T 705  [92m☑[0m 705 
Q 504+99  T 603  [92m☑[0m 603 

--------------------------------------------------
Iteration 99
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 287+57  T 344  [92m☑[0m 344 
Q 545+55  T 600  [92m☑[0m 600 
Q 827+801 T 1628 [92m☑[0

### Analysis
* Only addition
* 3 digits
* Training size: 20.25K
* Total training epoch: 100 epoch
* First reach 99% validation accuracy at 14 epoch

In [12]:
DIGITS = 4
MAXLEN = DIGITS + 1 + DIGITS
ansmaxlen = DIGITS + 1
chars = '0123456789+ '
DATA_SIZE = 60000

ctable = CharacterTable(chars)
questions, expected, shows = DataGenerate('+', maxlen=MAXLEN, digits=DIGITS, ansmaxlen=ansmaxlen)
x, y = Vectorization(questions, expected, ctable, maxlen=MAXLEN, ansmaxlen=ansmaxlen)
x_train, y_train, x_val, y_val, test_x, test_y = shuffle(x,y)

model = BuildModel(maxlen=MAXLEN, ansmaxlen=ansmaxlen)
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, verbose=1,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

Generating data...
Total addition questions: 60000
Vectorization...
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 640)               1671680   
_________________________________________________________________
reshape_2 (Reshape)          (None, 5, 128)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 5, 640)            1968640   
_________________________________________________________________
dense_2 (Dense)              (None, 5, 12)             7692      
Total params: 3,648,012
Trainable params: 3,648,012
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 0
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 111+71    T 182   [91m☒[0m 112  
Q 89+38     T 127   

Q 240+50    T 290   [92m☑[0m 290  
Q 8520+314  T 8834  [92m☑[0m 8834 
Q 583+3     T 586   [92m☑[0m 586  
Q 2520+6    T 2526  [92m☑[0m 2526 
Q 5552+7    T 5559  [92m☑[0m 5559 
Q 4451+258  T 4709  [92m☑[0m 4709 
Q 3622+0    T 3622  [92m☑[0m 3622 
Q 4288+457  T 4745  [92m☑[0m 4745 
Q 179+43    T 222   [92m☑[0m 222  
Q 745+62    T 807   [92m☑[0m 807  

--------------------------------------------------
Iteration 13
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 6400+536  T 6936  [92m☑[0m 6936 
Q 654+0     T 654   [92m☑[0m 654  
Q 773+63    T 836   [92m☑[0m 836  
Q 260+14    T 274   [92m☑[0m 274  
Q 4925+40   T 4965  [92m☑[0m 4965 
Q 3079+807  T 3886  [92m☑[0m 3886 
Q 630+559   T 1189  [92m☑[0m 1189 
Q 9617+47   T 9664  [92m☑[0m 9664 
Q 4710+6    T 4716  [92m☑[0m 4716 
Q 7482+92   T 7574  [92m☑[0m 7574 

--------------------------------------------------
Iteration 14
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 7010+6    

Q 342+122   T 464   [92m☑[0m 464  
Q 4100+2399 T 6499  [92m☑[0m 6499 
Q 432+6     T 438   [92m☑[0m 438  
Q 85+71     T 156   [92m☑[0m 156  
Q 8977+9    T 8986  [92m☑[0m 8986 
Q 645+235   T 880   [92m☑[0m 880  
Q 5852+4206 T 10058 [92m☑[0m 10058
Q 719+85    T 804   [92m☑[0m 804  
Q 532+499   T 1031  [92m☑[0m 1031 
Q 118+4     T 122   [92m☑[0m 122  

--------------------------------------------------
Iteration 26
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 2784+6    T 2790  [92m☑[0m 2790 
Q 5633+248  T 5881  [92m☑[0m 5881 
Q 147+41    T 188   [92m☑[0m 188  
Q 8890+76   T 8966  [92m☑[0m 8966 
Q 610+429   T 1039  [92m☑[0m 1039 
Q 5567+59   T 5626  [92m☑[0m 5626 
Q 4837+9    T 4846  [92m☑[0m 4846 
Q 8426+7270 T 15696 [91m☒[0m 15697
Q 9844+487  T 10331 [92m☑[0m 10331
Q 4685+79   T 4764  [92m☑[0m 4764 

--------------------------------------------------
Iteration 27
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 309+83    

Q 112+8     T 120   [92m☑[0m 120  
Q 241+170   T 411   [92m☑[0m 411  
Q 9320+49   T 9369  [92m☑[0m 9369 
Q 591+18    T 609   [92m☑[0m 609  
Q 806+160   T 966   [92m☑[0m 966  
Q 5872+24   T 5896  [92m☑[0m 5896 
Q 7692+377  T 8069  [92m☑[0m 8069 
Q 707+1     T 708   [92m☑[0m 708  
Q 6814+9    T 6823  [92m☑[0m 6823 
Q 29+20     T 49    [92m☑[0m 49   

--------------------------------------------------
Iteration 40
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 26+15     T 41    [92m☑[0m 41   
Q 352+341   T 693   [92m☑[0m 693  
Q 3819+8    T 3827  [92m☑[0m 3827 
Q 2517+2062 T 4579  [92m☑[0m 4579 
Q 881+736   T 1617  [92m☑[0m 1617 
Q 6165+391  T 6556  [92m☑[0m 6556 
Q 42+4      T 46    [92m☑[0m 46   
Q 3934+694  T 4628  [92m☑[0m 4628 
Q 620+57    T 677   [92m☑[0m 677  
Q 277+25    T 302   [92m☑[0m 302  

--------------------------------------------------
Iteration 41
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 2227+41   

Q 3544+0    T 3544  [92m☑[0m 3544 
Q 354+82    T 436   [92m☑[0m 436  
Q 2187+982  T 3169  [91m☒[0m 3179 
Q 8341+8    T 8349  [92m☑[0m 8349 
Q 846+4     T 850   [92m☑[0m 850  
Q 6156+79   T 6235  [92m☑[0m 6235 
Q 466+278   T 744   [92m☑[0m 744  
Q 778+215   T 993   [92m☑[0m 993  
Q 584+20    T 604   [92m☑[0m 604  
Q 8176+202  T 8378  [92m☑[0m 8378 

--------------------------------------------------
Iteration 53
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 79+33     T 112   [92m☑[0m 112  
Q 6498+74   T 6572  [92m☑[0m 6572 
Q 728+34    T 762   [92m☑[0m 762  
Q 3858+4    T 3862  [92m☑[0m 3862 
Q 46+15     T 61    [92m☑[0m 61   
Q 972+14    T 986   [92m☑[0m 986  
Q 724+92    T 816   [92m☑[0m 816  
Q 3625+233  T 3858  [92m☑[0m 3858 
Q 9639+965  T 10604 [92m☑[0m 10604
Q 2352+1100 T 3452  [92m☑[0m 3452 

--------------------------------------------------
Iteration 54
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 2894+2    

Q 7860+4    T 7864  [92m☑[0m 7864 
Q 5484+19   T 5503  [92m☑[0m 5503 
Q 2627+2    T 2629  [92m☑[0m 2629 
Q 933+656   T 1589  [92m☑[0m 1589 
Q 304+3     T 307   [92m☑[0m 307  
Q 194+65    T 259   [92m☑[0m 259  
Q 9134+480  T 9614  [92m☑[0m 9614 
Q 4189+520  T 4709  [92m☑[0m 4709 
Q 3243+85   T 3328  [92m☑[0m 3328 
Q 8876+175  T 9051  [92m☑[0m 9051 

--------------------------------------------------
Iteration 67
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 4420+33   T 4453  [92m☑[0m 4453 
Q 1803+637  T 2440  [92m☑[0m 2440 
Q 552+499   T 1051  [92m☑[0m 1051 
Q 6939+4612 T 11551 [91m☒[0m 11541
Q 6473+0    T 6473  [92m☑[0m 6473 
Q 8623+592  T 9215  [92m☑[0m 9215 
Q 78+1      T 79    [92m☑[0m 79   
Q 4817+7    T 4824  [92m☑[0m 4824 
Q 5906+2650 T 8556  [92m☑[0m 8556 
Q 962+7     T 969   [92m☑[0m 969  

--------------------------------------------------
Iteration 68
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 4627+44   

Q 319+1     T 320   [92m☑[0m 320  
Q 1573+1036 T 2609  [92m☑[0m 2609 
Q 4008+4    T 4012  [92m☑[0m 4012 
Q 91+40     T 131   [92m☑[0m 131  
Q 2954+195  T 3149  [92m☑[0m 3149 
Q 893+5     T 898   [92m☑[0m 898  
Q 249+8     T 257   [92m☑[0m 257  
Q 519+357   T 876   [92m☑[0m 876  
Q 783+40    T 823   [92m☑[0m 823  
Q 274+6     T 280   [92m☑[0m 280  

--------------------------------------------------
Iteration 80
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 9876+5    T 9881  [92m☑[0m 9881 
Q 1549+725  T 2274  [92m☑[0m 2274 
Q 2577+21   T 2598  [92m☑[0m 2598 
Q 683+24    T 707   [92m☑[0m 707  
Q 459+44    T 503   [92m☑[0m 503  
Q 7917+13   T 7930  [92m☑[0m 7930 
Q 589+8     T 597   [92m☑[0m 597  
Q 2084+39   T 2123  [92m☑[0m 2123 
Q 1465+659  T 2124  [92m☑[0m 2124 
Q 6205+404  T 6609  [92m☑[0m 6609 

--------------------------------------------------
Iteration 81
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 4420+33   

Q 692+5     T 697   [92m☑[0m 697  
Q 701+68    T 769   [92m☑[0m 769  
Q 6891+710  T 7601  [92m☑[0m 7601 
Q 22+6      T 28    [92m☑[0m 28   
Q 6360+64   T 6424  [92m☑[0m 6424 
Q 1579+2    T 1581  [92m☑[0m 1581 
Q 94+24     T 118   [92m☑[0m 118  
Q 908+85    T 993   [92m☑[0m 993  
Q 4797+0    T 4797  [92m☑[0m 4797 
Q 930+535   T 1465  [92m☑[0m 1465 

--------------------------------------------------
Iteration 93
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 8473+0    T 8473  [92m☑[0m 8473 
Q 375+9     T 384   [92m☑[0m 384  
Q 2938+7    T 2945  [92m☑[0m 2945 
Q 9028+37   T 9065  [92m☑[0m 9065 
Q 1787+327  T 2114  [92m☑[0m 2114 
Q 310+7     T 317   [92m☑[0m 317  
Q 752+475   T 1227  [92m☑[0m 1227 
Q 9970+33   T 10003 [92m☑[0m 10003
Q 6404+0    T 6404  [92m☑[0m 6404 
Q 474+122   T 596   [92m☑[0m 596  

--------------------------------------------------
Iteration 94
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 2471+24   

### Analysis
* Only addition
* 4 digits
* Training size: 40.5K
* Total training epoch: 100 epoch
* First reach 99% validation accuracy at 13 epoch

In [13]:
DIGITS = 3
MAXLEN = DIGITS + 1 + DIGITS
ansmaxlen = DIGITS
chars = '0123456789- '
DATA_SIZE = 30000

ctable = CharacterTable(chars)
questions, expected, shows = DataGenerate('-', maxlen=MAXLEN, digits=DIGITS, ansmaxlen=ansmaxlen)
x, y = Vectorization(questions, expected, ctable, maxlen=MAXLEN, ansmaxlen=ansmaxlen)
x_train, y_train, x_val, y_val, test_x, test_y = shuffle(x,y)

model = BuildModel(maxlen=MAXLEN, ansmaxlen=ansmaxlen)
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, verbose=1,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

Generating data...
Total addition questions: 30000
Vectorization...
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 384)               609792    
_________________________________________________________________
reshape_3 (Reshape)          (None, 3, 128)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 3, 384)            787968    
_________________________________________________________________
dense_3 (Dense)              (None, 3, 12)             4620      
Total params: 1,402,380
Trainable params: 1,402,380
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 0
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 823-29  T 794 [91m☒[0m 22 
Q 684-79  T 605 [91m☒[0m 

Q 812-54  T 758 [92m☑[0m 758
Q 984-83  T 901 [92m☑[0m 901
Q 810-26  T 784 [92m☑[0m 784
Q 190-81  T 109 [92m☑[0m 109
Q 497-33  T 464 [92m☑[0m 464
Q 271-8   T 263 [92m☑[0m 263
Q 879-89  T 790 [92m☑[0m 790
Q 739-73  T 666 [92m☑[0m 666
Q 371-90  T 281 [92m☑[0m 281
Q 473-72  T 401 [92m☑[0m 401

--------------------------------------------------
Iteration 14
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 364-26  T 338 [92m☑[0m 338
Q 959-44  T 915 [92m☑[0m 915
Q 315-3   T 312 [92m☑[0m 312
Q 520-37  T 483 [92m☑[0m 483
Q 186-26  T 160 [92m☑[0m 160
Q 99-30   T 69  [92m☑[0m 69 
Q 131-83  T 48  [91m☒[0m 49 
Q 620-94  T 526 [92m☑[0m 526
Q 512-12  T 500 [91m☒[0m 400
Q 105-3   T 102 [92m☑[0m 102

--------------------------------------------------
Iteration 15
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 913-9   T 904 [92m☑[0m 904
Q 674-11  T 663 [92m☑[0m 663
Q 264-36  T 228 [92m☑[0m 228
Q 715-5   T 710 [92m☑[0m 710
Q 273-2 

Q 270-50  T 220 [91m☒[0m 210
Q 446-97  T 349 [92m☑[0m 349
Q 672-12  T 660 [92m☑[0m 660
Q 80-30   T 50  [92m☑[0m 50 
Q 947-3   T 944 [92m☑[0m 944
Q 247-5   T 242 [92m☑[0m 242
Q 148-13  T 135 [92m☑[0m 135
Q 2-1     T 1   [91m☒[0m 0  
Q 957-323 T 634 [92m☑[0m 634
Q 895-0   T 895 [92m☑[0m 895

--------------------------------------------------
Iteration 29
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 610-4   T 606 [92m☑[0m 606
Q 632-37  T 595 [92m☑[0m 595
Q 394-77  T 317 [92m☑[0m 317
Q 972-68  T 904 [92m☑[0m 904
Q 181-4   T 177 [92m☑[0m 177
Q 590-66  T 524 [92m☑[0m 524
Q 742-514 T 228 [92m☑[0m 228
Q 714-9   T 705 [92m☑[0m 705
Q 117-6   T 111 [92m☑[0m 111
Q 926-7   T 919 [92m☑[0m 919

--------------------------------------------------
Iteration 30
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 82-6    T 76  [92m☑[0m 76 
Q 763-541 T 222 [92m☑[0m 222
Q 823-29  T 794 [92m☑[0m 794
Q 931-183 T 748 [92m☑[0m 748
Q 75-13 

Q 343-32  T 311 [92m☑[0m 311
Q 258-170 T 88  [92m☑[0m 88 
Q 938-71  T 867 [92m☑[0m 867
Q 71-29   T 42  [92m☑[0m 42 
Q 689-47  T 642 [92m☑[0m 642
Q 939-20  T 919 [92m☑[0m 919
Q 662-47  T 615 [92m☑[0m 615
Q 390-0   T 390 [92m☑[0m 390
Q 787-209 T 578 [92m☑[0m 578
Q 36-0    T 36  [92m☑[0m 36 

--------------------------------------------------
Iteration 44
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 261-3   T 258 [92m☑[0m 258
Q 910-251 T 659 [92m☑[0m 659
Q 105-2   T 103 [92m☑[0m 103
Q 48-39   T 9   [92m☑[0m 9  
Q 257-6   T 251 [92m☑[0m 251
Q 682-350 T 332 [92m☑[0m 332
Q 682-92  T 590 [92m☑[0m 590
Q 791-8   T 783 [92m☑[0m 783
Q 605-417 T 188 [92m☑[0m 188
Q 799-3   T 796 [92m☑[0m 796

--------------------------------------------------
Iteration 45
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 927-389 T 538 [92m☑[0m 538
Q 149-2   T 147 [92m☑[0m 147
Q 514-70  T 444 [92m☑[0m 444
Q 74-15   T 59  [92m☑[0m 59 
Q 98-62 

Q 813-60  T 753 [92m☑[0m 753
Q 954-16  T 938 [92m☑[0m 938
Q 578-322 T 256 [92m☑[0m 256
Q 92-75   T 17  [92m☑[0m 17 
Q 809-5   T 804 [92m☑[0m 804
Q 84-81   T 3   [92m☑[0m 3  
Q 388-373 T 15  [92m☑[0m 15 
Q 386-81  T 305 [92m☑[0m 305
Q 292-30  T 262 [92m☑[0m 262
Q 959-86  T 873 [92m☑[0m 873

--------------------------------------------------
Iteration 59
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 25-1    T 24  [92m☑[0m 24 
Q 407-0   T 407 [92m☑[0m 407
Q 615-573 T 42  [92m☑[0m 42 
Q 455-9   T 446 [92m☑[0m 446
Q 458-71  T 387 [92m☑[0m 387
Q 93-85   T 8   [92m☑[0m 8  
Q 546-3   T 543 [92m☑[0m 543
Q 768-243 T 525 [92m☑[0m 525
Q 520-7   T 513 [92m☑[0m 513
Q 690-17  T 673 [92m☑[0m 673

--------------------------------------------------
Iteration 60
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 343-193 T 150 [92m☑[0m 150
Q 98-18   T 80  [92m☑[0m 80 
Q 59-9    T 50  [92m☑[0m 50 
Q 58-12   T 46  [92m☑[0m 46 
Q 974-1 

Q 601-26  T 575 [92m☑[0m 575
Q 276-8   T 268 [92m☑[0m 268
Q 819-194 T 625 [92m☑[0m 625
Q 727-273 T 454 [92m☑[0m 454
Q 362-1   T 361 [92m☑[0m 361
Q 620-0   T 620 [92m☑[0m 620
Q 560-29  T 531 [92m☑[0m 531
Q 591-40  T 551 [92m☑[0m 551
Q 739-73  T 666 [92m☑[0m 666
Q 592-86  T 506 [92m☑[0m 506

--------------------------------------------------
Iteration 74
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 470-4   T 466 [92m☑[0m 466
Q 425-36  T 389 [92m☑[0m 389
Q 83-38   T 45  [92m☑[0m 45 
Q 71-51   T 20  [92m☑[0m 20 
Q 829-0   T 829 [92m☑[0m 829
Q 767-54  T 713 [92m☑[0m 713
Q 166-29  T 137 [92m☑[0m 137
Q 832-73  T 759 [92m☑[0m 759
Q 996-77  T 919 [92m☑[0m 919
Q 957-26  T 931 [92m☑[0m 931

--------------------------------------------------
Iteration 75
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 895-681 T 214 [92m☑[0m 214
Q 198-55  T 143 [92m☑[0m 143
Q 769-325 T 444 [92m☑[0m 444
Q 278-3   T 275 [92m☑[0m 275
Q 783-77

Q 461-78  T 383 [92m☑[0m 383
Q 134-58  T 76  [92m☑[0m 76 
Q 684-6   T 678 [92m☑[0m 678
Q 946-3   T 943 [92m☑[0m 943
Q 107-58  T 49  [92m☑[0m 49 
Q 303-2   T 301 [92m☑[0m 301
Q 904-0   T 904 [92m☑[0m 904
Q 234-73  T 161 [92m☑[0m 161
Q 749-185 T 564 [92m☑[0m 564
Q 210-63  T 147 [92m☑[0m 147

--------------------------------------------------
Iteration 89
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 129-66  T 63  [92m☑[0m 63 
Q 995-36  T 959 [92m☑[0m 959
Q 627-24  T 603 [92m☑[0m 603
Q 685-3   T 682 [92m☑[0m 682
Q 71-67   T 4   [92m☑[0m 4  
Q 690-17  T 673 [92m☑[0m 673
Q 815-6   T 809 [92m☑[0m 809
Q 242-52  T 190 [92m☑[0m 190
Q 96-88   T 8   [92m☑[0m 8  
Q 95-37   T 58  [92m☑[0m 58 

--------------------------------------------------
Iteration 90
Train on 20250 samples, validate on 2250 samples
Epoch 1/1
Q 96-82   T 14  [92m☑[0m 14 
Q 43-14   T 29  [92m☑[0m 29 
Q 280-31  T 249 [92m☑[0m 249
Q 306-0   T 306 [92m☑[0m 306
Q 986-28

### Analysis
* Only subtraction
* 3 digits
* Training size: 20.25K
* Total training epoch: 100 epoch
* First reach 99% validation accuracy at 27 epoch

In [14]:
DIGITS = 4
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789- '
ansmaxlen = DIGITS
DATA_SIZE = 60000

ctable = CharacterTable(chars)
questions, expected, shows = DataGenerate('-', maxlen=MAXLEN, digits=DIGITS, ansmaxlen=ansmaxlen)
x, y = Vectorization(questions, expected, ctable, maxlen=MAXLEN, ansmaxlen=ansmaxlen)
x_train, y_train, x_val, y_val, test_x, test_y = shuffle(x,y)

model = BuildModel(maxlen=MAXLEN, ansmaxlen=ansmaxlen)
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, verbose=1,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

Generating data...
Total addition questions: 60000
Vectorization...
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_7 (LSTM)                (None, 512)               1075200   
_________________________________________________________________
reshape_4 (Reshape)          (None, 4, 128)            0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 4, 512)            1312768   
_________________________________________________________________
dense_4 (Dense)              (None, 4, 12)             6156      
Total params: 2,394,124
Trainable params: 2,394,124
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 0
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 818-42    T 776  [91m☒[0m 806 
Q 2280-47   T 2233 [91

Q 7958-81   T 7877 [92m☑[0m 7877
Q 4191-72   T 4119 [92m☑[0m 4119
Q 7592-59   T 7533 [92m☑[0m 7533
Q 3602-2    T 3600 [92m☑[0m 3600
Q 916-11    T 905  [92m☑[0m 905 
Q 437-292   T 145  [92m☑[0m 145 
Q 2443-41   T 2402 [92m☑[0m 2402
Q 466-40    T 426  [92m☑[0m 426 
Q 7774-5419 T 2355 [91m☒[0m 2256
Q 939-8     T 931  [92m☑[0m 931 

--------------------------------------------------
Iteration 13
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 802-80    T 722  [92m☑[0m 722 
Q 876-8     T 868  [92m☑[0m 868 
Q 133-114   T 19   [92m☑[0m 19  
Q 5527-111  T 5416 [91m☒[0m 5516
Q 7906-8    T 7898 [92m☑[0m 7898
Q 3243-39   T 3204 [92m☑[0m 3204
Q 753-700   T 53   [91m☒[0m 43  
Q 90-46     T 44   [92m☑[0m 44  
Q 3034-4    T 3030 [92m☑[0m 3030
Q 7251-0    T 7251 [92m☑[0m 7251

--------------------------------------------------
Iteration 14
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 944-81    T 863  [92m☑[0m 863 
Q 3309-48   T 326

Q 931-95    T 836  [92m☑[0m 836 
Q 1658-101  T 1557 [92m☑[0m 1557
Q 2513-221  T 2292 [92m☑[0m 2292
Q 9869-22   T 9847 [92m☑[0m 9847
Q 4720-343  T 4377 [92m☑[0m 4377
Q 320-6     T 314  [92m☑[0m 314 
Q 8998-299  T 8699 [92m☑[0m 8699
Q 7945-2770 T 5175 [92m☑[0m 5175
Q 734-7     T 727  [92m☑[0m 727 
Q 421-23    T 398  [92m☑[0m 398 

--------------------------------------------------
Iteration 27
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 278-73    T 205  [92m☑[0m 205 
Q 8057-8    T 8049 [92m☑[0m 8049
Q 2049-0    T 2049 [92m☑[0m 2049
Q 755-184   T 571  [92m☑[0m 571 
Q 6348-6    T 6342 [92m☑[0m 6342
Q 624-206   T 418  [92m☑[0m 418 
Q 8131-0    T 8131 [92m☑[0m 8131
Q 8787-6705 T 2082 [92m☑[0m 2082
Q 8740-60   T 8680 [92m☑[0m 8680
Q 857-138   T 719  [92m☑[0m 719 

--------------------------------------------------
Iteration 28
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 4414-716  T 3698 [92m☑[0m 3698
Q 8764-74   T 869

Q 7904-56   T 7848 [92m☑[0m 7848
Q 9090-98   T 8992 [91m☒[0m 8002
Q 6334-241  T 6093 [92m☑[0m 6093
Q 84-60     T 24   [92m☑[0m 24  
Q 72-22     T 50   [92m☑[0m 50  
Q 7209-1016 T 6193 [92m☑[0m 6193
Q 2283-11   T 2272 [92m☑[0m 2272
Q 605-37    T 568  [92m☑[0m 568 
Q 6462-61   T 6401 [92m☑[0m 6401
Q 803-447   T 356  [92m☑[0m 356 

--------------------------------------------------
Iteration 41
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 7396-96   T 7300 [92m☑[0m 7300
Q 916-6     T 910  [92m☑[0m 910 
Q 460-64    T 396  [92m☑[0m 396 
Q 9866-9    T 9857 [92m☑[0m 9857
Q 774-199   T 575  [92m☑[0m 575 
Q 4304-21   T 4283 [92m☑[0m 4283
Q 959-8     T 951  [92m☑[0m 951 
Q 5017-2    T 5015 [92m☑[0m 5015
Q 7779-13   T 7766 [92m☑[0m 7766
Q 84-50     T 34   [92m☑[0m 34  

--------------------------------------------------
Iteration 42
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 3500-18   T 3482 [92m☑[0m 3482
Q 548-330   T 218

Q 223-135   T 88   [92m☑[0m 88  
Q 103-5     T 98   [92m☑[0m 98  
Q 45-45     T 0    [92m☑[0m 0   
Q 97-63     T 34   [92m☑[0m 34  
Q 7777-2323 T 5454 [92m☑[0m 5454
Q 8975-403  T 8572 [92m☑[0m 8572
Q 3686-451  T 3235 [92m☑[0m 3235
Q 541-428   T 113  [92m☑[0m 113 
Q 476-12    T 464  [92m☑[0m 464 
Q 42-23     T 19   [92m☑[0m 19  

--------------------------------------------------
Iteration 55
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 7448-135  T 7313 [92m☑[0m 7313
Q 176-2     T 174  [92m☑[0m 174 
Q 934-9     T 925  [92m☑[0m 925 
Q 5469-91   T 5378 [92m☑[0m 5378
Q 5500-8    T 5492 [92m☑[0m 5492
Q 567-1     T 566  [92m☑[0m 566 
Q 2296-2    T 2294 [92m☑[0m 2294
Q 7930-81   T 7849 [92m☑[0m 7849
Q 591-20    T 571  [92m☑[0m 571 
Q 2156-6    T 2150 [92m☑[0m 2150

--------------------------------------------------
Iteration 56
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 444-9     T 435  [92m☑[0m 435 
Q 6515-3576 T 293

Q 584-425   T 159  [92m☑[0m 159 
Q 672-74    T 598  [92m☑[0m 598 
Q 257-34    T 223  [92m☑[0m 223 
Q 882-38    T 844  [92m☑[0m 844 
Q 1996-9    T 1987 [92m☑[0m 1987
Q 71-37     T 34   [92m☑[0m 34  
Q 6082-0    T 6082 [92m☑[0m 6082
Q 574-47    T 527  [92m☑[0m 527 
Q 961-437   T 524  [92m☑[0m 524 
Q 7606-541  T 7065 [92m☑[0m 7065

--------------------------------------------------
Iteration 69
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 768-668   T 100  [92m☑[0m 100 
Q 7240-389  T 6851 [92m☑[0m 6851
Q 564-34    T 530  [92m☑[0m 530 
Q 6407-776  T 5631 [92m☑[0m 5631
Q 248-11    T 237  [92m☑[0m 237 
Q 9433-7555 T 1878 [92m☑[0m 1878
Q 8706-955  T 7751 [92m☑[0m 7751
Q 61-3      T 58   [92m☑[0m 58  
Q 7441-714  T 6727 [92m☑[0m 6727
Q 4344-5    T 4339 [92m☑[0m 4339

--------------------------------------------------
Iteration 70
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 9007-9    T 8998 [92m☑[0m 8998
Q 2050-5    T 204

Q 8658-818  T 7840 [92m☑[0m 7840
Q 83-47     T 36   [92m☑[0m 36  
Q 405-28    T 377  [92m☑[0m 377 
Q 7721-0    T 7721 [92m☑[0m 7721
Q 6711-260  T 6451 [92m☑[0m 6451
Q 879-665   T 214  [92m☑[0m 214 
Q 814-80    T 734  [92m☑[0m 734 
Q 260-3     T 257  [92m☑[0m 257 
Q 1291-1    T 1290 [92m☑[0m 1290
Q 580-53    T 527  [92m☑[0m 527 

--------------------------------------------------
Iteration 83
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 1091-682  T 409  [92m☑[0m 409 
Q 6080-1347 T 4733 [92m☑[0m 4733
Q 4146-0    T 4146 [92m☑[0m 4146
Q 6544-4314 T 2230 [92m☑[0m 2230
Q 901-1     T 900  [92m☑[0m 900 
Q 741-59    T 682  [92m☑[0m 682 
Q 1382-47   T 1335 [92m☑[0m 1335
Q 84-52     T 32   [92m☑[0m 32  
Q 9197-7605 T 1592 [91m☒[0m 1492
Q 287-56    T 231  [92m☑[0m 231 

--------------------------------------------------
Iteration 84
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 3725-411  T 3314 [92m☑[0m 3314
Q 363-270   T 93 

Q 1587-1    T 1586 [92m☑[0m 1586
Q 1155-633  T 522  [92m☑[0m 522 
Q 9003-89   T 8914 [92m☑[0m 8914
Q 9942-2    T 9940 [92m☑[0m 9940
Q 7366-6    T 7360 [92m☑[0m 7360
Q 3659-0    T 3659 [92m☑[0m 3659
Q 3500-18   T 3482 [92m☑[0m 3482
Q 2777-971  T 1806 [92m☑[0m 1806
Q 3491-90   T 3401 [92m☑[0m 3401
Q 532-8     T 524  [92m☑[0m 524 

--------------------------------------------------
Iteration 97
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 1956-2    T 1954 [92m☑[0m 1954
Q 731-1     T 730  [92m☑[0m 730 
Q 146-63    T 83   [92m☑[0m 83  
Q 8508-8    T 8500 [92m☑[0m 8500
Q 582-198   T 384  [92m☑[0m 384 
Q 696-71    T 625  [92m☑[0m 625 
Q 213-68    T 145  [92m☑[0m 145 
Q 24-10     T 14   [92m☑[0m 14  
Q 7396-96   T 7300 [92m☑[0m 7300
Q 5440-456  T 4984 [92m☑[0m 4984

--------------------------------------------------
Iteration 98
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 6655-183  T 6472 [92m☑[0m 6472
Q 1839-277  T 156

### Analysis
* Only subtraction
* 4 digits
* Training size: 40.5K
* Total training epoch: 100 epoch
* First reach 99% validation accuracy at 25 epoch

In [15]:
DIGITS = 3
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+- '
ansmaxlen = DIGITS + 1
DATA_SIZE = 45000

ctable = CharacterTable(chars)
questions, expected, shows = DataGenerate('+-', maxlen=MAXLEN, digits=DIGITS, ansmaxlen=ansmaxlen)
x, y = Vectorization(questions, expected, ctable, maxlen=MAXLEN, ansmaxlen=ansmaxlen)
x_train, y_train, x_val, y_val, test_x, test_y = shuffle(x,y)

model = BuildModel(maxlen=MAXLEN, ansmaxlen=ansmaxlen)
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, verbose=1,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

Generating data...
Total addition questions: 45000
Vectorization...
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 512)               1077248   
_________________________________________________________________
reshape_5 (Reshape)          (None, 4, 128)            0         
_________________________________________________________________
lstm_10 (LSTM)               (None, 4, 512)            1312768   
_________________________________________________________________
dense_5 (Dense)              (None, 4, 13)             6669      
Total params: 2,396,685
Trainable params: 2,396,685
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 0
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 615+5   T 620  [91m☒[0m 107 
Q 692-8   T 684  [91m☒[

Q 639-84  T 555  [92m☑[0m 555 
Q 528+71  T 599  [91m☒[0m 609 
Q 112-66  T 46   [91m☒[0m 55  
Q 63-1    T 62   [92m☑[0m 62  
Q 816+36  T 852  [92m☑[0m 852 
Q 695+184 T 879  [92m☑[0m 879 
Q 721+4   T 725  [92m☑[0m 725 
Q 470-8   T 462  [92m☑[0m 462 
Q 148+38  T 186  [92m☑[0m 186 
Q 508+20  T 528  [92m☑[0m 528 

--------------------------------------------------
Iteration 14
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 346-129 T 217  [92m☑[0m 217 
Q 79+26   T 105  [92m☑[0m 105 
Q 893+784 T 1677 [92m☑[0m 1677
Q 73-14   T 59   [91m☒[0m 69  
Q 835-73  T 762  [92m☑[0m 762 
Q 611-4   T 607  [92m☑[0m 607 
Q 764-0   T 764  [92m☑[0m 764 
Q 715-571 T 144  [92m☑[0m 144 
Q 949-176 T 773  [92m☑[0m 773 
Q 689+205 T 894  [92m☑[0m 894 

--------------------------------------------------
Iteration 15
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 363-221 T 142  [92m☑[0m 142 
Q 290-58  T 232  [92m☑[0m 232 
Q 289+0   T 289  [92m☑[0

Q 63-13   T 50   [92m☑[0m 50  
Q 721+91  T 812  [92m☑[0m 812 
Q 920-605 T 315  [92m☑[0m 315 
Q 471-72  T 399  [92m☑[0m 399 
Q 487+48  T 535  [92m☑[0m 535 
Q 203-7   T 196  [92m☑[0m 196 
Q 904+815 T 1719 [92m☑[0m 1719
Q 458-18  T 440  [92m☑[0m 440 
Q 63-13   T 50   [92m☑[0m 50  
Q 207+4   T 211  [92m☑[0m 211 

--------------------------------------------------
Iteration 28
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 40-33   T 7    [92m☑[0m 7   
Q 529-424 T 105  [92m☑[0m 105 
Q 92+36   T 128  [92m☑[0m 128 
Q 701+698 T 1399 [91m☒[0m 1499
Q 895+623 T 1518 [92m☑[0m 1518
Q 78-2    T 76   [92m☑[0m 76  
Q 639-84  T 555  [92m☑[0m 555 
Q 560-85  T 475  [92m☑[0m 475 
Q 183-24  T 159  [92m☑[0m 159 
Q 586-26  T 560  [92m☑[0m 560 

--------------------------------------------------
Iteration 29
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 462-0   T 462  [92m☑[0m 462 
Q 34-25   T 9    [92m☑[0m 9   
Q 557-1   T 556  [92m☑[0

Q 946+13  T 959  [92m☑[0m 959 
Q 95-50   T 45   [92m☑[0m 45  
Q 281-7   T 274  [92m☑[0m 274 
Q 831-1   T 830  [92m☑[0m 830 
Q 997-303 T 694  [91m☒[0m 594 
Q 874-15  T 859  [92m☑[0m 859 
Q 94-40   T 54   [92m☑[0m 54  
Q 816+3   T 819  [92m☑[0m 819 
Q 728+71  T 799  [92m☑[0m 799 
Q 763-3   T 760  [92m☑[0m 760 

--------------------------------------------------
Iteration 42
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 421-0   T 421  [92m☑[0m 421 
Q 313-55  T 258  [92m☑[0m 258 
Q 570+3   T 573  [92m☑[0m 573 
Q 347-6   T 341  [92m☑[0m 341 
Q 62-23   T 39   [92m☑[0m 39  
Q 28+2    T 30   [92m☑[0m 30  
Q 257-4   T 253  [92m☑[0m 253 
Q 305+9   T 314  [92m☑[0m 314 
Q 783+47  T 830  [92m☑[0m 830 
Q 690+22  T 712  [92m☑[0m 712 

--------------------------------------------------
Iteration 43
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 331-78  T 253  [92m☑[0m 253 
Q 898+25  T 923  [92m☑[0m 923 
Q 488+5   T 493  [92m☑[0

Q 322+7   T 329  [92m☑[0m 329 
Q 267-79  T 188  [92m☑[0m 188 
Q 767+461 T 1228 [92m☑[0m 1228
Q 363-220 T 143  [92m☑[0m 143 
Q 384+0   T 384  [92m☑[0m 384 
Q 561+109 T 670  [92m☑[0m 670 
Q 148+38  T 186  [92m☑[0m 186 
Q 778+92  T 870  [92m☑[0m 870 
Q 366-34  T 332  [92m☑[0m 332 
Q 389-56  T 333  [92m☑[0m 333 

--------------------------------------------------
Iteration 56
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 389-95  T 294  [92m☑[0m 294 
Q 82-45   T 37   [92m☑[0m 37  
Q 736-97  T 639  [92m☑[0m 639 
Q 833-437 T 396  [92m☑[0m 396 
Q 508+50  T 558  [92m☑[0m 558 
Q 284+251 T 535  [92m☑[0m 535 
Q 213+30  T 243  [92m☑[0m 243 
Q 474+28  T 502  [92m☑[0m 502 
Q 939-891 T 48   [91m☒[0m 57  
Q 485-7   T 478  [92m☑[0m 478 

--------------------------------------------------
Iteration 57
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 374+196 T 570  [92m☑[0m 570 
Q 112-43  T 69   [92m☑[0m 69  
Q 883+8   T 891  [92m☑[0

Q 376+11  T 387  [92m☑[0m 387 
Q 484+15  T 499  [92m☑[0m 499 
Q 97-81   T 16   [92m☑[0m 16  
Q 655-0   T 655  [92m☑[0m 655 
Q 377+6   T 383  [92m☑[0m 383 
Q 930+1   T 931  [92m☑[0m 931 
Q 440+63  T 503  [92m☑[0m 503 
Q 620+2   T 622  [92m☑[0m 622 
Q 474-88  T 386  [92m☑[0m 386 
Q 873-3   T 870  [92m☑[0m 870 

--------------------------------------------------
Iteration 70
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 90+1    T 91   [92m☑[0m 91  
Q 238+1   T 239  [92m☑[0m 239 
Q 988+675 T 1663 [92m☑[0m 1663
Q 964-782 T 182  [92m☑[0m 182 
Q 806-753 T 53   [92m☑[0m 53  
Q 486-57  T 429  [92m☑[0m 429 
Q 954+229 T 1183 [92m☑[0m 1183
Q 208+6   T 214  [92m☑[0m 214 
Q 540-70  T 470  [92m☑[0m 470 
Q 213+30  T 243  [92m☑[0m 243 

--------------------------------------------------
Iteration 71
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 500-5   T 495  [92m☑[0m 495 
Q 690-23  T 667  [92m☑[0m 667 
Q 100+5   T 105  [92m☑[0

Q 56+23   T 79   [92m☑[0m 79  
Q 880+7   T 887  [92m☑[0m 887 
Q 290-18  T 272  [92m☑[0m 272 
Q 850+92  T 942  [92m☑[0m 942 
Q 229+7   T 236  [92m☑[0m 236 
Q 995-2   T 993  [92m☑[0m 993 
Q 600-85  T 515  [92m☑[0m 515 
Q 528+2   T 530  [92m☑[0m 530 
Q 794+510 T 1304 [92m☑[0m 1304
Q 614+6   T 620  [92m☑[0m 620 

--------------------------------------------------
Iteration 84
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 173-37  T 136  [92m☑[0m 136 
Q 859-85  T 774  [92m☑[0m 774 
Q 951-855 T 96   [92m☑[0m 96  
Q 212-29  T 183  [92m☑[0m 183 
Q 25-22   T 3    [92m☑[0m 3   
Q 911-0   T 911  [92m☑[0m 911 
Q 804-9   T 795  [92m☑[0m 795 
Q 466-3   T 463  [92m☑[0m 463 
Q 991-7   T 984  [92m☑[0m 984 
Q 190-7   T 183  [92m☑[0m 183 

--------------------------------------------------
Iteration 85
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 694-68  T 626  [92m☑[0m 626 
Q 526-67  T 459  [92m☑[0m 459 
Q 881-42  T 839  [92m☑[0

Q 222-6   T 216  [92m☑[0m 216 
Q 76+20   T 96   [92m☑[0m 96  
Q 67-38   T 29   [92m☑[0m 29  
Q 876+3   T 879  [92m☑[0m 879 
Q 94-43   T 51   [92m☑[0m 51  
Q 816-338 T 478  [92m☑[0m 478 
Q 107+8   T 115  [92m☑[0m 115 
Q 156+22  T 178  [92m☑[0m 178 
Q 600-0   T 600  [92m☑[0m 600 
Q 246+8   T 254  [92m☑[0m 254 

--------------------------------------------------
Iteration 98
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 181-82  T 99   [92m☑[0m 99  
Q 883+114 T 997  [92m☑[0m 997 
Q 628-5   T 623  [92m☑[0m 623 
Q 564-88  T 476  [92m☑[0m 476 
Q 301+180 T 481  [92m☑[0m 481 
Q 758+57  T 815  [92m☑[0m 815 
Q 131-81  T 50   [92m☑[0m 50  
Q 19+7    T 26   [92m☑[0m 26  
Q 669+24  T 693  [92m☑[0m 693 
Q 43+2    T 45   [92m☑[0m 45  

--------------------------------------------------
Iteration 99
Train on 30375 samples, validate on 3375 samples
Epoch 1/1
Q 409-9   T 400  [92m☑[0m 400 
Q 96+71   T 167  [92m☑[0m 167 
Q 92+18   T 110  [92m☑[0

### Analysis
* addition & subtraction
* 3 digits
* Training size: 30.375K
* Total training epoch: 100 epoch
* First reach 99% validation accuracy at 25 epoch

In [16]:
DIGITS = 4
MAXLEN = DIGITS + 1 + DIGITS
ansmaxlen = DIGITS + 1
chars = '0123456789+- '
DATA_SIZE = 60000

ctable = CharacterTable(chars)
questions, expected, shows = DataGenerate('+-', maxlen=MAXLEN, digits=DIGITS, ansmaxlen=ansmaxlen)
x, y = Vectorization(questions, expected, ctable, maxlen=MAXLEN, ansmaxlen=ansmaxlen)
x_train, y_train, x_val, y_val, test_x, test_y = shuffle(x,y)

model = BuildModel(maxlen=MAXLEN, ansmaxlen=ansmaxlen)
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, verbose=1,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

Generating data...
Total addition questions: 60000
Vectorization...
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_11 (LSTM)               (None, 640)               1674240   
_________________________________________________________________
reshape_6 (Reshape)          (None, 5, 128)            0         
_________________________________________________________________
lstm_12 (LSTM)               (None, 5, 640)            1968640   
_________________________________________________________________
dense_6 (Dense)              (None, 5, 13)             8333      
Total params: 3,651,213
Trainable params: 3,651,213
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 0
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 4841+7    T 4848  [91m☒[0m 6626 
Q 6570-915  T 5655  

Q 511-10    T 501   [92m☑[0m 501  
Q 6794+265  T 7059  [91m☒[0m 6059 
Q 62-1      T 61    [92m☑[0m 61   
Q 881+636   T 1517  [92m☑[0m 1517 
Q 4562-90   T 4472  [92m☑[0m 4472 
Q 8376+135  T 8511  [92m☑[0m 8511 
Q 9692-24   T 9668  [92m☑[0m 9668 
Q 850-57    T 793   [92m☑[0m 793  
Q 938+22    T 960   [92m☑[0m 960  
Q 3135+3    T 3138  [92m☑[0m 3138 

--------------------------------------------------
Iteration 13
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 3664+8    T 3672  [92m☑[0m 3672 
Q 7160+720  T 7880  [92m☑[0m 7880 
Q 724-4     T 720   [92m☑[0m 720  
Q 7119+8    T 7127  [92m☑[0m 7127 
Q 3279+6    T 3285  [92m☑[0m 3285 
Q 917-830   T 87    [91m☒[0m 77   
Q 564+461   T 1025  [92m☑[0m 1025 
Q 6766+0    T 6766  [92m☑[0m 6766 
Q 9555-663  T 8892  [92m☑[0m 8892 
Q 64+20     T 84    [92m☑[0m 84   

--------------------------------------------------
Iteration 14
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 8550-2    

Q 3903-937  T 2966  [92m☑[0m 2966 
Q 760-712   T 48    [91m☒[0m 68   
Q 8033-83   T 7950  [92m☑[0m 7950 
Q 5378-3967 T 1411  [91m☒[0m 311  
Q 750-1     T 749   [92m☑[0m 749  
Q 8838-308  T 8530  [92m☑[0m 8530 
Q 892+437   T 1329  [92m☑[0m 1329 
Q 7873-49   T 7824  [91m☒[0m 7834 
Q 105+1     T 106   [92m☑[0m 106  
Q 3888+16   T 3904  [92m☑[0m 3904 

--------------------------------------------------
Iteration 26
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 8646+79   T 8725  [92m☑[0m 8725 
Q 9725+77   T 9802  [92m☑[0m 9802 
Q 114-48    T 66    [92m☑[0m 66   
Q 2024+475  T 2499  [92m☑[0m 2499 
Q 2482+13   T 2495  [92m☑[0m 2495 
Q 508-435   T 73    [92m☑[0m 73   
Q 5273+1    T 5274  [92m☑[0m 5274 
Q 2145+8    T 2153  [92m☑[0m 2153 
Q 456-21    T 435   [92m☑[0m 435  
Q 3862+806  T 4668  [92m☑[0m 4668 

--------------------------------------------------
Iteration 27
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 97+92     

Q 132-83    T 49    [92m☑[0m 49   
Q 859+62    T 921   [92m☑[0m 921  
Q 7007+8    T 7015  [92m☑[0m 7015 
Q 8418+5    T 8423  [92m☑[0m 8423 
Q 417-1     T 416   [92m☑[0m 416  
Q 1404-0    T 1404  [92m☑[0m 1404 
Q 1513+659  T 2172  [92m☑[0m 2172 
Q 765+392   T 1157  [91m☒[0m 1057 
Q 912-77    T 835   [92m☑[0m 835  
Q 3997-341  T 3656  [92m☑[0m 3656 

--------------------------------------------------
Iteration 40
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 4473-5    T 4468  [92m☑[0m 4468 
Q 7788+2491 T 10279 [91m☒[0m 10289
Q 5911+436  T 6347  [92m☑[0m 6347 
Q 3412-0    T 3412  [92m☑[0m 3412 
Q 481-30    T 451   [92m☑[0m 451  
Q 279-56    T 223   [92m☑[0m 223  
Q 711+0     T 711   [92m☑[0m 711  
Q 543-57    T 486   [92m☑[0m 486  
Q 9505-42   T 9463  [92m☑[0m 9463 
Q 8946+20   T 8966  [92m☑[0m 8966 

--------------------------------------------------
Iteration 41
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 25-2      

Q 95-43     T 52    [92m☑[0m 52   
Q 982-0     T 982   [92m☑[0m 982  
Q 5606-40   T 5566  [92m☑[0m 5566 
Q 174-39    T 135   [92m☑[0m 135  
Q 44+11     T 55    [92m☑[0m 55   
Q 911-777   T 134   [92m☑[0m 134  
Q 58-7      T 51    [92m☑[0m 51   
Q 212-43    T 169   [92m☑[0m 169  
Q 3898+909  T 4807  [91m☒[0m 4817 
Q 331-4     T 327   [92m☑[0m 327  

--------------------------------------------------
Iteration 53
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 812+74    T 886   [92m☑[0m 886  
Q 1789+5    T 1794  [92m☑[0m 1794 
Q 6248-373  T 5875  [92m☑[0m 5875 
Q 647-6     T 641   [92m☑[0m 641  
Q 50-7      T 43    [92m☑[0m 43   
Q 7304-719  T 6585  [92m☑[0m 6585 
Q 928-186   T 742   [92m☑[0m 742  
Q 5534-5170 T 364   [91m☒[0m 336  
Q 93+51     T 144   [92m☑[0m 144  
Q 869-47    T 822   [92m☑[0m 822  

--------------------------------------------------
Iteration 54
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 4123+748  

Q 1755+7    T 1762  [92m☑[0m 1762 
Q 74-8      T 66    [92m☑[0m 66   
Q 28-26     T 2     [91m☒[0m 1    
Q 5001-0    T 5001  [92m☑[0m 5001 
Q 967-60    T 907   [92m☑[0m 907  
Q 445+434   T 879   [92m☑[0m 879  
Q 723+42    T 765   [92m☑[0m 765  
Q 184-4     T 180   [92m☑[0m 180  
Q 528-39    T 489   [92m☑[0m 489  
Q 1718+76   T 1794  [92m☑[0m 1794 

--------------------------------------------------
Iteration 66
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 967-60    T 907   [92m☑[0m 907  
Q 840+110   T 950   [92m☑[0m 950  
Q 7160+41   T 7201  [92m☑[0m 7201 
Q 72-41     T 31    [92m☑[0m 31   
Q 162-21    T 141   [92m☑[0m 141  
Q 1133+4    T 1137  [92m☑[0m 1137 
Q 313+7     T 320   [92m☑[0m 320  
Q 146-62    T 84    [92m☑[0m 84   
Q 3589-3341 T 248   [91m☒[0m 237  
Q 931-29    T 902   [92m☑[0m 902  

--------------------------------------------------
Iteration 67
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 6810+883  

Q 202+63    T 265   [92m☑[0m 265  
Q 8159-225  T 7934  [92m☑[0m 7934 
Q 639-598   T 41    [92m☑[0m 41   
Q 40-27     T 13    [92m☑[0m 13   
Q 4552+331  T 4883  [92m☑[0m 4883 
Q 459-83    T 376   [92m☑[0m 376  
Q 8230-36   T 8194  [92m☑[0m 8194 
Q 4018-17   T 4001  [92m☑[0m 4001 
Q 80+37     T 117   [92m☑[0m 117  
Q 748-7     T 741   [92m☑[0m 741  

--------------------------------------------------
Iteration 80
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 535+2     T 537   [92m☑[0m 537  
Q 5736+95   T 5831  [92m☑[0m 5831 
Q 107+3     T 110   [92m☑[0m 110  
Q 629+72    T 701   [92m☑[0m 701  
Q 244-16    T 228   [92m☑[0m 228  
Q 264+69    T 333   [92m☑[0m 333  
Q 424+16    T 440   [92m☑[0m 440  
Q 5082+67   T 5149  [92m☑[0m 5149 
Q 40-33     T 7     [92m☑[0m 7    
Q 746-7     T 739   [92m☑[0m 739  

--------------------------------------------------
Iteration 81
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 731-8     

Q 9656-45   T 9611  [92m☑[0m 9611 
Q 5229-247  T 4982  [92m☑[0m 4982 
Q 82-3      T 79    [92m☑[0m 79   
Q 203-1     T 202   [92m☑[0m 202  
Q 628+75    T 703   [92m☑[0m 703  
Q 893+13    T 906   [92m☑[0m 906  
Q 8817+7082 T 15899 [91m☒[0m 15909
Q 149-42    T 107   [92m☑[0m 107  
Q 7165-17   T 7148  [92m☑[0m 7148 
Q 4689-726  T 3963  [92m☑[0m 3963 

--------------------------------------------------
Iteration 93
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 565+414   T 979   [92m☑[0m 979  
Q 259-257   T 2     [91m☒[0m 1    
Q 577-9     T 568   [92m☑[0m 568  
Q 7968-7    T 7961  [92m☑[0m 7961 
Q 9851+3    T 9854  [92m☑[0m 9854 
Q 695-1     T 694   [92m☑[0m 694  
Q 665+66    T 731   [92m☑[0m 731  
Q 5953+3116 T 9069  [91m☒[0m 8069 
Q 6333-742  T 5591  [92m☑[0m 5591 
Q 8607-834  T 7773  [92m☑[0m 7773 

--------------------------------------------------
Iteration 94
Train on 40500 samples, validate on 4500 samples
Epoch 1/1
Q 733-35    

### Analysis
* addition & subtraction
* 4 digits
* Training size: 40.5K
* Total training epoch: 100 epoch
* First reach 99% validation accuracy at 49 epoch

# Multiplication

In [17]:
DIGITS = 3
MAXLEN = DIGITS + 1 + DIGITS
ansmaxlen = DIGITS + DIGITS
LAYERS = 3
chars = '0123456789* '
DATA_SIZE = 80000

ctable = CharacterTable(chars)
questions, expected, shows = DataGenerate('*', maxlen=MAXLEN, digits=DIGITS, ansmaxlen=ansmaxlen)
x, y = Vectorization(questions, expected, ctable, maxlen=MAXLEN, ansmaxlen=ansmaxlen)
x_train, y_train, x_val, y_val, test_x, test_y = shuffle(x,y)

model = BuildModel(maxlen=MAXLEN, ansmaxlen=ansmaxlen)
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train, verbose=1,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

Generating data...
Total addition questions: 80000
Vectorization...
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_13 (LSTM)               (None, 768)               2399232   
_________________________________________________________________
reshape_7 (Reshape)          (None, 6, 128)            0         
_________________________________________________________________
lstm_14 (LSTM)               (None, 6, 768)            2755584   
_________________________________________________________________
dense_7 (Dense)              (None, 6, 12)             9228      
Total params: 5,164,044
Trainable params: 5,164,044
Non-trainable params: 0
_________________________________________________________________

--------------------------------------------------
Iteration 0
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 164*11  T 1804   [91m☒[0m 1088  
Q 178*32  T 5696   [

Q 698*673 T 469754 [91m☒[0m 442754
Q 81*72   T 5832   [91m☒[0m 5822  
Q 649*637 T 413413 [91m☒[0m 413693
Q 180*40  T 7200   [92m☑[0m 7200  
Q 613*247 T 151411 [91m☒[0m 159491
Q 187*1   T 187    [92m☑[0m 187   
Q 810*709 T 574290 [91m☒[0m 586490
Q 532*97  T 51604  [91m☒[0m 51124 
Q 132*26  T 3432   [91m☒[0m 3452  
Q 906*589 T 533634 [91m☒[0m 533494

--------------------------------------------------
Iteration 13
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 162*91  T 14742  [91m☒[0m 15742 
Q 992*199 T 197408 [91m☒[0m 198808
Q 456*268 T 122208 [91m☒[0m 124268
Q 992*767 T 760864 [91m☒[0m 769204
Q 861*33  T 28413  [91m☒[0m 28973 
Q 997*145 T 144565 [91m☒[0m 145965
Q 94*48   T 4512   [92m☑[0m 4512  
Q 359*83  T 29797  [91m☒[0m 30697 
Q 212*18  T 3816   [91m☒[0m 3356  
Q 248*86  T 21328  [91m☒[0m 21968 

--------------------------------------------------
Iteration 14
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 117*93  T 

Q 734*336 T 246624 [91m☒[0m 240084
Q 682*53  T 36146  [91m☒[0m 35626 
Q 301*28  T 8428   [92m☑[0m 8428  
Q 168*26  T 4368   [91m☒[0m 4288  
Q 125*82  T 10250  [91m☒[0m 10300 
Q 846*8   T 6768   [92m☑[0m 6768  
Q 894*465 T 415710 [91m☒[0m 417310
Q 764*187 T 142868 [91m☒[0m 145598
Q 545*21  T 11445  [91m☒[0m 11485 
Q 53*6    T 318    [92m☑[0m 318   

--------------------------------------------------
Iteration 26
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 952*36  T 34272  [91m☒[0m 34832 
Q 985*80  T 78800  [92m☑[0m 78800 
Q 873*783 T 683559 [91m☒[0m 683899
Q 162*73  T 11826  [91m☒[0m 11706 
Q 769*7   T 5383   [92m☑[0m 5383  
Q 829*130 T 107770 [91m☒[0m 106370
Q 209*88  T 18392  [92m☑[0m 18392 
Q 476*66  T 31416  [91m☒[0m 30016 
Q 501*6   T 3006   [92m☑[0m 3006  
Q 950*31  T 29450  [91m☒[0m 29850 

--------------------------------------------------
Iteration 27
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 284*75  T 

Q 592*90  T 53280  [92m☑[0m 53280 
Q 165*0   T 0      [92m☑[0m 0     
Q 55*39   T 2145   [91m☒[0m 2165  
Q 934*402 T 375468 [91m☒[0m 376128
Q 92*29   T 2668   [91m☒[0m 2688  
Q 921*26  T 23946  [92m☑[0m 23946 
Q 791*79  T 62489  [91m☒[0m 62589 
Q 575*399 T 229425 [91m☒[0m 231125
Q 508*311 T 157988 [91m☒[0m 157798
Q 63*12   T 756    [92m☑[0m 756   

--------------------------------------------------
Iteration 40
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 471*40  T 18840  [92m☑[0m 18840 
Q 972*54  T 52488  [91m☒[0m 52948 
Q 428*39  T 16692  [91m☒[0m 16392 
Q 461*57  T 26277  [92m☑[0m 26277 
Q 163*90  T 14670  [92m☑[0m 14670 
Q 261*68  T 17748  [91m☒[0m 17948 
Q 894*303 T 270882 [91m☒[0m 275382
Q 238*75  T 17850  [91m☒[0m 17650 
Q 733*325 T 238225 [91m☒[0m 231725
Q 846*357 T 302022 [91m☒[0m 310122

--------------------------------------------------
Iteration 41
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 195*24  T 

Q 609*487 T 296583 [91m☒[0m 299183
Q 873*758 T 661734 [91m☒[0m 659914
Q 974*776 T 755824 [91m☒[0m 757064
Q 225*141 T 31725  [91m☒[0m 31325 
Q 215*152 T 32680  [91m☒[0m 32880 
Q 200*39  T 7800   [92m☑[0m 7800  
Q 169*118 T 19942  [91m☒[0m 29862 
Q 693*390 T 270270 [91m☒[0m 270870
Q 146*68  T 9928   [91m☒[0m 10288 
Q 312*249 T 77688  [91m☒[0m 79288 

--------------------------------------------------
Iteration 53
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 218*47  T 10246  [91m☒[0m 1008  
Q 430*64  T 27520  [91m☒[0m 26920 
Q 790*71  T 56090  [92m☑[0m 56090 
Q 603*49  T 29547  [91m☒[0m 29347 
Q 605*214 T 129470 [91m☒[0m 129420
Q 419*11  T 4609   [91m☒[0m 4589  
Q 976*958 T 935008 [91m☒[0m 945508
Q 973*519 T 504987 [91m☒[0m 507127
Q 538*98  T 52724  [92m☑[0m 52724 
Q 896*657 T 588672 [91m☒[0m 580272

--------------------------------------------------
Iteration 54
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 375*58  T 

Q 819*89  T 72891  [91m☒[0m 73791 
Q 522*34  T 17748  [91m☒[0m 17348 
Q 695*93  T 64635  [91m☒[0m 64935 
Q 904*224 T 202496 [91m☒[0m 203396
Q 567*74  T 41958  [91m☒[0m 41358 
Q 591*149 T 88059  [91m☒[0m 86759 
Q 486*4   T 1944   [92m☑[0m 1944  
Q 352*6   T 2112   [92m☑[0m 2112  
Q 873*716 T 625068 [91m☒[0m 627468
Q 935*250 T 233750 [92m☑[0m 233750

--------------------------------------------------
Iteration 66
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 756*66  T 49896  [91m☒[0m 49696 
Q 771*480 T 370080 [91m☒[0m 372080
Q 945*59  T 55755  [91m☒[0m 55955 
Q 107*70  T 7490   [91m☒[0m 7590  
Q 591*529 T 312639 [91m☒[0m 303239
Q 656*93  T 61008  [91m☒[0m 60728 
Q 652*388 T 252976 [91m☒[0m 255276
Q 396*74  T 29304  [91m☒[0m 29784 
Q 890*199 T 177110 [91m☒[0m 177510
Q 780*0   T 0      [92m☑[0m 0     

--------------------------------------------------
Iteration 67
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 359*95  T 

Q 975*60  T 58500  [92m☑[0m 58500 
Q 689*200 T 137800 [92m☑[0m 137800
Q 413*21  T 8673   [92m☑[0m 8673  
Q 419*87  T 36453  [91m☒[0m 36413 
Q 812*598 T 485576 [91m☒[0m 486956
Q 951*539 T 512589 [91m☒[0m 515189
Q 386*3   T 1158   [92m☑[0m 1158  
Q 496*69  T 34224  [92m☑[0m 34224 
Q 197*26  T 5122   [91m☒[0m 5022  
Q 988*447 T 441636 [91m☒[0m 440316

--------------------------------------------------
Iteration 80
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 947*6   T 5682   [92m☑[0m 5682  
Q 328*134 T 43952  [91m☒[0m 44552 
Q 123*57  T 7011   [91m☒[0m 6991  
Q 347*15  T 5205   [92m☑[0m 5205  
Q 80*58   T 4640   [92m☑[0m 4640  
Q 320*44  T 14080  [91m☒[0m 14880 
Q 621*46  T 28566  [92m☑[0m 28566 
Q 90*55   T 4950   [92m☑[0m 4950  
Q 747*19  T 14193  [91m☒[0m 14413 
Q 495*4   T 1980   [92m☑[0m 1980  

--------------------------------------------------
Iteration 81
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 665*90  T 

Q 199*136 T 27064  [91m☒[0m 25664 
Q 727*40  T 29080  [92m☑[0m 29080 
Q 637*17  T 10829  [91m☒[0m 10869 
Q 241*6   T 1446   [92m☑[0m 1446  
Q 985*913 T 899305 [91m☒[0m 898245
Q 767*76  T 58292  [91m☒[0m 58552 
Q 640*591 T 378240 [91m☒[0m 381640
Q 705*48  T 33840  [91m☒[0m 33440 
Q 710*0   T 0      [92m☑[0m 0     
Q 543*36  T 19548  [91m☒[0m 19148 

--------------------------------------------------
Iteration 93
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 855*140 T 119700 [91m☒[0m 117700
Q 150*12  T 1800   [92m☑[0m 1800  
Q 918*425 T 390150 [91m☒[0m 393750
Q 846*8   T 6768   [92m☑[0m 6768  
Q 678*67  T 45426  [91m☒[0m 45726 
Q 431*337 T 145247 [91m☒[0m 149207
Q 363*48  T 17424  [91m☒[0m 17304 
Q 272*5   T 1360   [92m☑[0m 1360  
Q 704*493 T 347072 [91m☒[0m 349252
Q 511*75  T 38325  [91m☒[0m 38225 

--------------------------------------------------
Iteration 94
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 592*5   T 

### Analysis
* Multiplication
* 3 digits
* Need to modify the maximum length of answer
* Training size: 54K
* Total training epoch: 100 epoch
* First reach 75% validation accuracy at 15 epoch
* Best accuracy: 78.30%

# Testing

In [18]:
print("MSG : Prediction")
for i in range(10):
    ind = np.random.randint(0, len(test_x))
    rowx, rowy = test_x[np.array([ind])], test_y[np.array([ind])]
    preds = model.predict_classes(rowx, verbose=0)
    q = ctable.decode(rowx[0])
    correct = ctable.decode(rowy[0])
    guess = ctable.decode(preds[0], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)

MSG : Prediction
Q 896*74  T 66304  [92m☑[0m 66304 
Q 319*60  T 19140  [92m☑[0m 19140 
Q 285*77  T 21945  [91m☒[0m 21245 
Q 610*80  T 48800  [92m☑[0m 48800 
Q 591*232 T 137112 [91m☒[0m 138212
Q 983*757 T 744131 [91m☒[0m 734571
Q 146*27  T 3942   [91m☒[0m 3982  
Q 560*219 T 122640 [91m☒[0m 126840
Q 938*7   T 6566   [92m☑[0m 6566  
Q 667*76  T 50692  [91m☒[0m 50582 


# Report 
* Addition only or substraction only or addition+substraction both can reach 99% evaluate accuracy
* The more digits need to be computed, the more training data need to make accuracy higher
* Multiplication is more difficult than addition+substraction. Even use 54K training data to train, just reach almost 80% validation accuracy