In [1]:
import random
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import LSTM, TimeDistributed, Dense, RepeatVector

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


** Generate Sum Pairs **

In [2]:
# generate lists of random integers and their sum
def random_sum_pairs(n_examples, n_numbers, largest):
    X, y = [], []
    for i in range(n_examples):
        in_pattern = [random.randint(1, largest) for _ in range(n_numbers)]
        out_pattern = sum(in_pattern)
        
        X.append(in_pattern)
        y.append(out_pattern)
    return X, y

In [3]:
random.seed(1)
n_samples = 2
n_numbers = 2
largest = 10

X, y = random_sum_pairs(n_samples, n_numbers, largest)
print(X, y)

[[3, 10], [2, 5]] [13, 7]


** Integers to Padded Strings **

In [4]:
# pad and convert data to strings
def to_string(X, y, n_numbers, largest):
    max_length = int(n_numbers * np.ceil(np.log10(largest+1)) + n_numbers - 1)
    Xstr = []
    for pattern in X:
        strp = '+'.join([str(n) for n in pattern])
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        Xstr.append(strp)
    
    max_length = int(np.ceil(np.log10(n_numbers * (largest + 1))))
    ystr = []
    
    for pattern in y:
        strp = str(pattern)
        strp = ''.join([' ' for _ in range(max_length - len(strp))]) + strp
        ystr.append(strp)
    return Xstr, ystr


In [5]:
# generate pairs
X, y = random_sum_pairs(n_samples, n_numbers, largest)
print(X, y)
# convert to strings
X, y = to_string(X, y, n_numbers, largest)
print(X, y)

[[2, 8], [8, 8]] [10, 16]
['  2+8', '  8+8'] ['10', '16']


** Integer Encoded Sequences **

In [6]:
# integer encode strings
def integer_encode(X, y, alphabet):
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    Xenc = []
    for pattern in X:
        integer_encoded = [char_to_int[char] for char in pattern]
        Xenc.append(integer_encoded)
    
    yenc = []
    for pattern in y:
        integer_encoded = [char_to_int[char] for char in pattern]
        yenc.append(integer_encoded)
    return Xenc, yenc

In [7]:
# generate pairs
X, y = random_sum_pairs(n_samples, n_numbers, largest)
print(X, y)
X, y = to_string(X, y, n_numbers, largest)
print(X, y)

# integer encode
alphabet = list('1234567890 +')
X, y = integer_encode(X, y, alphabet)
print(X, y)

[[7, 4], [2, 8]] [11, 10]
['  7+4', '  2+8'] ['11', '10']
[[10, 10, 6, 11, 3], [10, 10, 1, 11, 7]] [[0, 0], [0, 9]]


** One Hot Encoded Sequences **

In [8]:
index_values = np.array(range(len(alphabet))).reshape(-1, 1)
encoder = OneHotEncoder(sparse=False).fit(index_values)

def one_hot_encode(X, y):
    Xenc = np.array([encoder.transform(np.array(vector).reshape(len(vector), 1)) for vector in X])
    yenc = np.array([encoder.transform(np.array(vector).reshape(len(vector), 1)) for vector in y])
    return Xenc, yenc

In [9]:
# generate pairs
X, y = random_sum_pairs(n_samples, n_numbers, largest)
print(X, y)
X, y = to_string(X, y, n_numbers, largest)
print(X, y)

# integer encode
alphabet = list('1234567890 +')
X, y = integer_encode(X, y, alphabet)
print(X, y)

# one hot encode
X, y = one_hot_encode(X, y)
print(X, y)

[[1, 7], [7, 10]] [8, 17]
['  1+7', ' 7+10'] [' 8', '17']
[[10, 10, 0, 11, 6], [10, 6, 11, 0, 9]] [[10, 7], [0, 6]]
[[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]] [[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]

 [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]]


** Sequence Generation Pipeline **

In [10]:
# generate an encoded dataset
def generate_data(n_samples, n_numbers, largest, alphabet):
    # generate pairs
    X, y = random_sum_pairs(n_samples, n_numbers, largest)
    # convert to strings
    X, y = to_string(X, y, n_numbers, largest)
    # integer encode
    X, y = integer_encode(X, y, alphabet)
    # one hot encode
    X, y = one_hot_encode(X, y)
    
    return X, y

** Decode Sequences **

In [11]:
# inbert encoding
def invert(seq, alphabet):
    int_to_char = dict((i, c) for i, c in enumerate(alphabet))
    strings = ''.join([int_to_char(np.argmax(vector)) for vector in seq])
    return strings

** Define and Compile the Model **

In [12]:
# number of math terms
n_terms = 3
# largest value for any single input digit
largest = 10
# scope of possible symbols for each input or output time step
alphabet = list('0123456789 +')

# size of alphabet
n_chars = len(alphabet)
# length of input sequence
n_in_seq_length = int(n_terms * np.ceil(np.log10(largest+1)) + n_terms - 1)
# length of encoded output sequence
n_out_seq_length = int(np.ceil(np.log10(n_terms * (largest+1))))

In [13]:
# define LSTM model
model = Sequential()
model.add(LSTM(75, input_shape=(n_in_seq_length, n_chars)))
model.add(RepeatVector(n_out_seq_length))
model.add(LSTM(50, return_sequences=True))
model.add(TimeDistributed(Dense(n_chars, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 75)                26400     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 2, 75)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 2, 50)             25200     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 2, 12)             612       
Total params: 52,212
Trainable params: 52,212
Non-trainable params: 0
_________________________________________________________________
None


** Fit the Model **

In [14]:
# fit LSTM
X, y = generate_data(75000, n_terms, largest, alphabet)
model.fit(X, y, epochs=1, batch_size=32)

Epoch 1/1


<keras.callbacks.History at 0x7fb4dbfeaef0>

** Evaluate the Model **

In [15]:
# evaluate LSTM
X, y = generate_data(100, n_terms, largest, alphabet)
loss, acc =  model.evaluate(X, y, verbose=0)
print('Loss: %f, Accuracy: %f' % (loss, acc * 100))

Loss: 0.173809, Accuracy: 98.500000


** Make Predictions with the Model **

In [16]:
# predict
for _ in range(10):
    # generate an input_output pair
    X, y = generate_data(1, n_terms, largest, alphabet)
    # make prediction
    yhat =  model.predict(X, verbose=0)
    #decode input, expected and predicted
    in_seq = invert(X[0], alphabet)
    out_seq = invert(y[0], alphabet)
    predicted = invert(yhat[0], alphabet)
    print('%s = %s  (expect %s)' % (in_seq, predicted, out_seq))

TypeError: 'dict' object is not callable