In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
import numpy as np
from six.moves import range
import argparse
import time

In [15]:
class CharacterTable(object):
    """Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """
    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.
        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [16]:

args = {
    'verbose':False,
}

verbose = 1 if args['verbose'] else 0
impl = 2

print("Starting:", time.ctime())

############################################
# Data

# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 4
INVERT = True

# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
ctable = CharacterTable(chars)

questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                    for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    # Skip any addition questions we've already seen
    # Also skip any such that x+Y == Y+x (hence the sorting).
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    # Pad the data with spaces such that it is always MAXLEN.
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    # Answers can be of maximum size DIGITS + 1.
    ans += ' ' * (DIGITS + 1 - len(ans))
    if INVERT:
        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
        # space used for padding.)
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

############################################
# Model

# Try replacing GRU, or SimpleRNN.
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
Dropout = 0.0

print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)), recurrent_dropout=Dropout, implementation=impl))
# As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True, recurrent_dropout=Dropout, implementation=impl))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))

############################################
# Training

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

# Train the model each generation and show predictions against the validation
# dataset.

iterations = 50
for iteration in range(1, iterations + 1):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val), verbose=verbose)
    # Select 10 samples from the validation set at random so we can visualize
    # errors.
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if INVERT else q)
        print('T', correct)
        if correct == guess:
            print('+', end=" ")
        else:
            print('-', end=" ")
        print(guess)
    print('---')
print()
print("Ending:", time.ctime())

Starting: Mon Mar 25 20:40:20 2019
Generating data...
Total addition questions: 50000
Vectorization...
Training Data:
(45000, 9, 12)
(45000, 5, 12)
Validation Data:
(5000, 9, 12)
(5000, 5, 12)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               72192     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 5, 128)            131584    
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 12)             1548      
_________________________________________________________________
activation (Activation)      (None, 5, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable para

Q 35+947   
T 982  
+ 982  
Q 5123+95  
T 5218 
+ 5218 
Q 54+8634  
T 8688 
+ 8688 
Q 4+7636   
T 7640 
- 7630 
Q 875+4    
T 879  
+ 879  
Q 99+58    
T 157  
+ 157  
Q 680+8    
T 688  
+ 688  
Q 764+95   
T 859  
+ 859  
Q 975+695  
T 1670 
+ 1670 
Q 2634+2239
T 4873 
- 4842 
---

--------------------------------------------------
Iteration 22
Q 66+889   
T 955  
+ 955  
Q 5+108    
T 113  
+ 113  
Q 987+758  
T 1745 
+ 1745 
Q 50+66    
T 116  
+ 116  
Q 57+96    
T 153  
+ 153  
Q 95+927   
T 1022 
+ 1022 
Q 463+48   
T 511  
+ 511  
Q 1+493    
T 494  
+ 494  
Q 7643+248 
T 7891 
+ 7891 
Q 915+531  
T 1446 
+ 1446 
---

--------------------------------------------------
Iteration 23
Q 776+7410 
T 8186 
- 8196 
Q 1960+3609
T 5569 
- 5598 
Q 868+7761 
T 8629 
+ 8629 
Q 5259+0   
T 5259 
+ 5259 
Q 8095+108 
T 8203 
+ 8203 
Q 659+2    
T 661  
+ 661  
Q 49+28    
T 77   
+ 77   
Q 157+1712 
T 1869 
- 1879 
Q 4482+949 
T 5431 
+ 5431 
Q 7869+8   
T 7877 
+ 7877 
---

-----------------

Q 6345+7381
T 13726
- 13716
Q 75+5833  
T 5908 
+ 5908 
Q 2034+7574
T 9608 
+ 9608 
Q 6170+316 
T 6486 
+ 6486 
Q 1416+6   
T 1422 
+ 1422 
Q 1757+627 
T 2384 
+ 2384 
Q 508+2194 
T 2702 
+ 2702 
Q 6395+1   
T 6396 
+ 6396 
Q 7481+9   
T 7490 
+ 7490 
Q 3974+259 
T 4233 
+ 4233 
---

--------------------------------------------------
Iteration 46
Q 60+5     
T 65   
+ 65   
Q 5649+4   
T 5653 
+ 5653 
Q 334+87   
T 421  
+ 421  
Q 51+846   
T 897  
+ 897  
Q 5986+6   
T 5992 
+ 5992 
Q 29+185   
T 214  
+ 214  
Q 557+1710 
T 2267 
- 2257 
Q 86+20    
T 106  
+ 106  
Q 64+322   
T 386  
+ 386  
Q 341+65   
T 406  
+ 406  
---

--------------------------------------------------
Iteration 47
Q 2611+82  
T 2693 
+ 2693 
Q 5236+4   
T 5240 
+ 5240 
Q 62+470   
T 532  
+ 532  
Q 47+7885  
T 7932 
+ 7932 
Q 8815+488 
T 9303 
- 9203 
Q 3+465    
T 468  
+ 468  
Q 6+7209   
T 7215 
+ 7215 
Q 15+5     
T 20   
+ 20   
Q 254+48   
T 302  
- 301  
Q 69+40    
T 109  
+ 109  
---

-----------------

In [40]:
q = '{}+{}'.format(986, )
query = q + ' ' * (MAXLEN - len(q))
print(query)
query_encoded = ctable.encode(query, MAXLEN)
qd = ctable.decode(query_encoded)
print(query_encoded)
query_encoded = query_encoded.reshape(1,9,12)
print('Anem a predir la suma...')
predict = model.predict_classes(query_encoded)
print(predict)
result = ctable.decode(predict[0], calc_argmax=False)
print(result)
print('Q', q[::-1] if INVERT else q)

986+5803 
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
Anem a predir la suma...
[[6 5 9 8 0]]
4376 
Q 3085+689


In [26]:
print(x_train.shape)

(45000, 9, 12)


In [28]:
query_encoded = query_encoded.reshape(1,9,12)

In [30]:
print(query_encoded.shape)

(1, 9, 12)
