In [24]:
from random import seed
from random import randint
from numpy import array
from math import ceil
from math import log10
from math import sqrt
from numpy import argmax
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers import RepeatVector

In [25]:
# generate lists of random integers and their sum
def random_sum_pairs(n_examples, n_numbers, largest):
    X, y = [], []
    for i in range(n_examples):
        in_pattern = [randint(1,largest) for _ in range(n_numbers)]
        out_pattern = sum(in_pattern)
        X.append(in_pattern)
        y.append(out_pattern)
    return X, y

In [26]:
X, y = random_sum_pairs(5, 2, 100)

X, y

([[90, 62], [52, 21], [68, 29], [59, 43], [52, 60]], [152, 73, 97, 102, 112])

In [27]:
# convert data to strings
def to_string(X, y, n_numbers, largest):
    max_length = n_numbers * ceil(log10(largest+1)) + n_numbers - 1
    Xstr = list()
    for pattern in X:
        strp = '+'.join([str(n) for n in pattern])
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        Xstr.append(strp)
    max_length = ceil(log10(n_numbers * (largest+1)))
    ystr = list()
    for pattern in y:
        strp = str(pattern)
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        ystr.append(strp)
    return Xstr, ystr

In [28]:
X, y = to_string(X, y, 2, 100)

X, y

(['  90+62', '  52+21', '  68+29', '  59+43', '  52+60'],
 ['152', ' 73', ' 97', '102', '112'])

In [29]:
# integer encode strings
def integer_encode(X, y, alphabet):
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    Xenc = list()
    for pattern in X:
        integer_encoded = [char_to_int[char] for char in pattern]
        Xenc.append(integer_encoded)
    yenc = list()
    for pattern in y:
        integer_encoded = [char_to_int[char] for char in pattern]
        yenc.append(integer_encoded)
    return Xenc, yenc

In [30]:
alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' ']

alphabet

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' ']

In [31]:
X, y = integer_encode(X, y, alphabet)

X, y

([[11, 11, 9, 0, 10, 6, 2],
  [11, 11, 5, 2, 10, 2, 1],
  [11, 11, 6, 8, 10, 2, 9],
  [11, 11, 5, 9, 10, 4, 3],
  [11, 11, 5, 2, 10, 6, 0]],
 [[1, 5, 2], [11, 7, 3], [11, 9, 7], [1, 0, 2], [1, 1, 2]])

In [32]:
# one hot encode
def one_hot_encode(X, y, max_int):
    Xenc = []
    for seq in X:
        pattern = []
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        Xenc.append(pattern)
    yenc = list()
    for seq in y:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        yenc.append(pattern)
    return Xenc, yenc

In [33]:
X, y

([[11, 11, 9, 0, 10, 6, 2],
  [11, 11, 5, 2, 10, 2, 1],
  [11, 11, 6, 8, 10, 2, 9],
  [11, 11, 5, 9, 10, 4, 3],
  [11, 11, 5, 2, 10, 6, 0]],
 [[1, 5, 2], [11, 7, 3], [11, 9, 7], [1, 0, 2], [1, 1, 2]])

In [34]:
X, y = one_hot_encode(X, y, len(alphabet))

In [35]:
# generate an encoded dataset
def generate_data(n_samples, n_numbers, largest, alphabet):
    # generate pairs
    X, y = random_sum_pairs(n_samples, n_numbers, largest)
    # convert to strings
    X, y = to_string(X, y, n_numbers, largest)
    # integer encode
    X, y = integer_encode(X, y, alphabet)
    # one hot encode
    X, y = one_hot_encode(X, y, len(alphabet))
    # return as numpy arrays
    X, y = array(X), array(y)
    return X, y

In [36]:
# invert encoding
def invert(seq, alphabet):
    int_to_char = dict((i, c) for i, c in enumerate(alphabet))
    strings = list()
    for pattern in seq:
        string = int_to_char[argmax(pattern)]
        strings.append(string)
    return ''.join(strings)

In [53]:
# define dataset
seed(1)
n_samples = 1000
n_numbers = 2
largest = 1000
alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' ']
n_chars = len(alphabet)
n_in_seq_length = n_numbers * ceil(log10(largest+1)) + n_numbers - 1
n_out_seq_length = ceil(log10(n_numbers * (largest+1)))
# define LSTM configuration
n_batch = 10
n_epoch = 30
# create LSTM
model = Sequential()
model.add(LSTM(100, input_shape=(n_in_seq_length, n_chars)))
model.add(RepeatVector(n_out_seq_length))
model.add(LSTM(50, return_sequences=True))
model.add(TimeDistributed(Dense(n_chars, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())


Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 100)               45200     
                                                                 
 repeat_vector_6 (RepeatVec  (None, 4, 100)            0         
 tor)                                                            
                                                                 
 lstm_13 (LSTM)              (None, 4, 50)             30200     
                                                                 
 time_distributed_6 (TimeDi  (None, 4, 12)             612       
 stributed)                                                      
                                                                 
Total params: 76012 (296.92 KB)
Trainable params: 76012 (296.92 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [54]:

# train LSTM
for i in range(n_epoch):
    X, y = generate_data(n_samples, n_numbers, largest, alphabet)
    print(i)
    model.fit(X, y, epochs=1, batch_size=n_batch)
 


0


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


In [55]:
# evaluate on some new patterns
X, y = generate_data(n_samples, n_numbers, largest, alphabet)
result = model.predict(X, batch_size=n_batch, verbose=0)
# calculate error
expected = [invert(x, alphabet) for x in y]
predicted = [invert(x, alphabet) for x in result]
# show some examples
for i in range(20):
    print('Expected=%s, Predicted=%s' % (expected[i], predicted[i]))

Expected= 510, Predicted= 666
Expected= 838, Predicted= 866
Expected=1602, Predicted=1588
Expected=1236, Predicted=1238
Expected=1670, Predicted=1688
Expected= 349, Predicted= 666
Expected=1365, Predicted=1368
Expected=1336, Predicted=1338
Expected= 833, Predicted= 119
Expected=1405, Predicted=1488
Expected= 811, Predicted= 866
Expected=1648, Predicted=1688
Expected=1016, Predicted=1009
Expected= 933, Predicted=1999
Expected=1015, Predicted=1009
Expected=1522, Predicted=1588
Expected=1068, Predicted=1109
Expected= 996, Predicted= 999
Expected=1606, Predicted=1688
Expected=1616, Predicted=1588


In [40]:
X[0], y[0]

(array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]]),
 array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]]))

In [41]:
invert(X[0], alphabet), invert(y[0], alphabet)

('  8+5', '13')