In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## OBJECTIVE :
Find Sum of two numbers through RNN / LSTM.

#### Input (Training)                    : numA + numB ,Target - only for training 
#### Input (Testing)                     : numA + numB
#### Output (Training/Testing)   : predictedSum 

Loss      : categorical_crossentropy

Optimizer : Adam

In [2]:
class CharacterTable(object):
    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        #create dicts form char2indices and indices2char
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.
        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [3]:
TRAINING_SIZE = 50000
DIGITS = 3
INVERT = True

MAXLEN = DIGITS + 1 + DIGITS

In [4]:
chars = '0123456789+ '
ctable = CharacterTable(chars)

In [5]:
questions = []
expected = []
seen = set()

In [6]:
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if INVERT:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

('Total addition questions:', 50000)


In [7]:
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

In [8]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

In [9]:
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


In [10]:
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [11]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))

for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


In [12]:
for iteration in range(1, 20):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if INVERT else q)
        print('T', correct)
        print(guess)
        print('---')

()
--------------------------------------------------
('Iteration', 1)
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
('Q', '121+517')
('T', '638 ')
102 
---
('Q', '42+338 ')
('T', '380 ')
122 
---
('Q', '869+68 ')
('T', '937 ')
100 
---
('Q', '170+691')
('T', '861 ')
102 
---
('Q', '15+393 ')
('T', '408 ')
101 
---
('Q', '75+288 ')
('T', '363 ')
109 
---
('Q', '47+313 ')
('T', '360 ')
121 
---
('Q', '918+715')
('T', '1633')
1009
---
('Q', '763+22 ')
('T', '785 ')
121 
---
('Q', '26+557 ')
('T', '583 ')
122 
---
()
--------------------------------------------------
('Iteration', 2)
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
('Q', '431+67 ')
('T', '498 ')
333 
---
('Q', '54+696 ')
('T', '750 ')
603 
---
('Q', '923+503')
('T', '1426')
1333
---
('Q', '583+9  ')
('T', '592 ')
13  
---
('Q', '419+98 ')
('T', '517 ')
903 
---
('Q', '371+53 ')
('T', '424 ')
333 
---
('Q', '254+4  ')
('T', '258 ')
33  
---
('Q', '16+663 ')
('T', '679 ')
663 
---
('Q', '9+644  ')
('T

('Q', '696+226')
('T', '922 ')
922 
---
('Q', '354+89 ')
('T', '443 ')
443 
---
('Q', '15+675 ')
('T', '690 ')
690 
---
('Q', '553+53 ')
('T', '606 ')
606 
---
('Q', '62+24  ')
('T', '86  ')
86  
---
('Q', '56+699 ')
('T', '755 ')
755 
---
('Q', '419+40 ')
('T', '459 ')
459 
---
('Q', '873+90 ')
('T', '963 ')
963 
---
('Q', '915+6  ')
('T', '921 ')
921 
---
('Q', '12+544 ')
('T', '556 ')
556 
---
()
--------------------------------------------------
('Iteration', 15)
Train on 45000 samples, validate on 5000 samples
Epoch 1/1
('Q', '9+597  ')
('T', '606 ')
606 
---
('Q', '71+24  ')
('T', '95  ')
95  
---
('Q', '45+722 ')
('T', '767 ')
767 
---
('Q', '975+92 ')
('T', '1067')
1067
---
('Q', '21+926 ')
('T', '947 ')
947 
---
('Q', '0+129  ')
('T', '129 ')
110 
---
('Q', '4+550  ')
('T', '554 ')
553 
---
('Q', '442+346')
('T', '788 ')
788 
---
('Q', '259+510')
('T', '769 ')
769 
---
('Q', '32+58  ')
('T', '90  ')
90  
---
()
--------------------------------------------------
('Iteration', 1