# Sequence to sequence learning for performing number addition

##### Importing the libraries

In [82]:
from random import randint,seed
from numpy  import array,argmax
from math   import ceil,log10,sqrt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import RepeatVector
import matplotlib.pyplot as plt

##### function to generate lists of random integers and their sum

In [67]:
def random_sum_pairs(n_examples, n_numbers, largest):
    X, y = list(), list()
    for i in range(n_examples):
        in_pattern = [randint(1,largest) for _ in range(n_numbers)]
        out_pattern = sum(in_pattern)
        X.append(in_pattern)
        y.append(out_pattern)
    return X, y

In [68]:
#X,y = random_sum_pairs(10,2,100)
#print('x :' + str(x))
#print('y :' + str(y))

##### function to  convert data to strings

In [69]:
# convert data to strings
def to_string(X, y, n_numbers, largest):
    max_length = n_numbers * ceil(log10(largest+1)) + n_numbers - 1
    Xstr = list()
    for pattern in X:
        strp = '+'.join([str(n) for n in pattern])
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        Xstr.append(strp)
    max_length = ceil(log10(n_numbers * (largest+1)))
    ystr = list()
    for pattern in y:
        strp = str(pattern)
        strp = ''.join([' ' for _ in range(max_length-len(strp))]) + strp
        ystr.append(strp)
    return Xstr, ystr

In [70]:
#Xstr,ystr = to_string(X,y,2,10)
#Xstr,ystr

##### function to encode the integer into strings

In [71]:
def integer_encode(X, y, alphabet):
    char_to_int = dict((c, i) for i, c in enumerate(alphabet))
    Xenc = list()
    for pattern in X:
        integer_encoded = [char_to_int[char] for char in pattern]
        Xenc.append(integer_encoded)
    yenc = list()
    for pattern in y:
        integer_encoded = [char_to_int[char] for char in pattern]
        yenc.append(integer_encoded)
    return Xenc, yenc

##### One Hot encoder

In [72]:
 
def one_hot_encode(X, y, max_int):
    Xenc = list()
    for seq in X:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        Xenc.append(pattern)
    yenc = list()
    for seq in y:
        pattern = list()
        for index in seq:
            vector = [0 for _ in range(max_int)]
            vector[index] = 1
            pattern.append(vector)
        yenc.append(pattern)
    return Xenc, yenc

### Function to generate the dataset the data generation pipline consists of 
##### 1. Generate the random digits
##### 2. Converting the digts to string
##### 3. encoding the digit's intger to string
##### 4. one hot encoding 


In [73]:
# generate an encoded dataset
def generate_data(n_samples, n_numbers, largest, alphabet):
    # generate pairs
    X, y = random_sum_pairs(n_samples, n_numbers, largest)
    # convert to strings
    X, y = to_string(X, y, n_numbers, largest)
    # integer encode
    X, y = integer_encode(X, y, alphabet)
    # one hot encode
    X, y = one_hot_encode(X, y, len(alphabet))
    # return as numpy arrays
    X, y = array(X), array(y)
    return X, y

##### Function to invert the encoding

In [74]:
# invert encoding
def invert(seq, alphabet):
    int_to_char = dict((i, c) for i, c in enumerate(alphabet))
    strings = list()
    for pattern in seq:
        string = int_to_char[argmax(pattern)]
        strings.append(string)
    return ''.join(strings)
 

## variables of the dataset
##### no. of sample in dataset : 50000
##### no. of interger in digits : 3
##### largest digit : 250

In [75]:
# define dataset
seed(1)
n_samples = 50000
n_numbers = 3
largest = 250
alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' ']
n_chars = len(alphabet)
n_in_seq_length = n_numbers * ceil(log10(largest+1)) + n_numbers - 1
n_out_seq_length = ceil(log10(n_numbers * (largest+1)))

## MODEL BUILDING

In [78]:
# define LSTM configuration
n_batch = 10
n_epoch = 100
# create LSTM
model = Sequential()
model.add(LSTM(128, input_shape=(n_in_seq_length, n_chars)))
model.add(RepeatVector(n_out_seq_length))
model.add(LSTM(128, return_sequences=True))
model.add((Dense(n_chars, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])   #learning_rate = 0.001
print(model.summary())

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_14 (LSTM)               (None, 128)               72192     
_________________________________________________________________
repeat_vector_7 (RepeatVecto (None, 3, 128)            0         
_________________________________________________________________
lstm_15 (LSTM)               (None, 3, 128)            131584    
_________________________________________________________________
dense_7 (Dense)              (None, 3, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________
None


##### MODEL TRAINING

In [79]:
# train LSTM
for i in range(1,n_epoch):
    X, y = generate_data(n_samples, n_numbers, largest, alphabet)
    print(i)
    model.fit(X, y, epochs=1, batch_size=n_batch)
    
    # evaluate on some new patterns
    X, y = generate_data(n_samples, n_numbers, largest, alphabet)
    result = model.predict(X, batch_size=n_batch, verbose=0)
    # calculate error
    expected = [invert(x, alphabet) for x in y]
    predicted = [invert(x, alphabet) for x in result]
    # show some examples
    for i in range(10):
        print('Expected=%s, Predicted=%s' % (expected[i], predicted[i]))

1
Expected=413, Predicted=422
Expected=359, Predicted=372
Expected=264, Predicted=282
Expected=361, Predicted=372
Expected=100, Predicted=125
Expected=321, Predicted=332
Expected=443, Predicted=452
Expected=523, Predicted=534
Expected=397, Predicted=417
Expected=464, Predicted=472
2
Expected=262, Predicted=251
Expected=584, Predicted=589
Expected=571, Predicted=564
Expected=432, Predicted=424
Expected=340, Predicted=334
Expected=377, Predicted=384
Expected=359, Predicted=364
Expected=451, Predicted=454
Expected=414, Predicted=414
Expected=179, Predicted=171
3
Expected=194, Predicted=193
Expected=417, Predicted=413
Expected=172, Predicted=173
Expected=397, Predicted=393
Expected=327, Predicted=323
Expected=414, Predicted=413
Expected=410, Predicted=413
Expected=120, Predicted=111
Expected=425, Predicted=423
Expected=309, Predicted=303
4
Expected=291, Predicted=283
Expected=251, Predicted=253
Expected=548, Predicted=552
Expected=487, Predicted=482
Expected=333, Predicted=333
Expected=250

In [80]:
for i in range(1,21):
    X, y = generate_data(n_samples, n_numbers, largest, alphabet)
    print(i)
    model.fit(X, y, epochs=1, batch_size=n_batch)
    
    # evaluate on some new patterns
    X, y = generate_data(n_samples, n_numbers, largest, alphabet)
    result = model.predict(X, batch_size=n_batch, verbose=0)
    # calculate error
    expected = [invert(x, alphabet) for x in y]
    predicted = [invert(x, alphabet) for x in result]
    # show some examples
    for i in range(10):
        print('Expected=%s, Predicted=%s' % (expected[i], predicted[i]))

1
Expected=247, Predicted=247
Expected=563, Predicted=561
Expected=174, Predicted=175
Expected=295, Predicted=295
Expected=441, Predicted=442
Expected= 90, Predicted= 98
Expected=356, Predicted=357
Expected=197, Predicted=197
Expected=336, Predicted=336
Expected=311, Predicted=312
2
Expected=215, Predicted=216
Expected=312, Predicted=312
Expected=384, Predicted=383
Expected=322, Predicted=322
Expected=295, Predicted=296
Expected=552, Predicted=553
Expected=271, Predicted=271
Expected=424, Predicted=424
Expected=498, Predicted=499
Expected=364, Predicted=364
3
Expected=367, Predicted=368
Expected=341, Predicted=340
Expected=429, Predicted=429
Expected=205, Predicted=205
Expected=415, Predicted=415
Expected=601, Predicted=601
Expected=285, Predicted=285
Expected=328, Predicted=328
Expected=114, Predicted=113
Expected=527, Predicted=527
4
Expected=184, Predicted=183
Expected=322, Predicted=322
Expected=463, Predicted=463
Expected=328, Predicted=328
Expected=368, Predicted=367
Expected=284

['loss', 'accuracy']