In [8]:
'''Sequence to sequence example in Keras (character-level).
This script demonstrates how to implement a basic character-level
sequence-to-sequence model. We apply it to translating
short English sentences into short French sentences,
character-by-character. Note that it is fairly unusual to
do character-level machine translation, as word-level
models are more common in this domain.
# Summary of the algorithm
- We start with input sequences from a domain (e.g. English sentences)
    and corresponding target sequences from another domain
    (e.g. French sentences).
- An encoder LSTM turns input sequences to 2 state vectors
    (we keep the last LSTM state and discard the outputs).
- A decoder LSTM is trained to turn the target sequences into
    the same sequence but offset by one timestep in the future,
    a training process called "teacher forcing" in this context.
    Is uses as initial state the state vectors from the encoder.
    Effectively, the decoder learns to generate `targets[t+1...]`
    given `targets[...t]`, conditioned on the input sequence.
- In inference mode, when we want to decode unknown input sequences, we:
    - Encode the input sequence into state vectors
    - Start with a target sequence of size 1
        (just the start-of-sequence character)
    - Feed the state vectors and 1-char target sequence
        to the decoder to produce predictions for the next character
    - Sample the next character using these predictions
        (we simply use argmax).
    - Append the sampled character to the target sequence
    - Repeat until we generate the end-of-sequence character or we
        hit the character limit.
# Data download
English to French sentence pairs.
http://www.manythings.org/anki/fra-eng.zip
Lots of neat sentence pairs datasets can be found at:
http://www.manythings.org/anki/
# References
- Sequence to Sequence Learning with Neural Networks
    https://arxiv.org/abs/1409.3215
- Learning Phrase Representations using
    RNN Encoder-Decoder for Statistical Machine Translation
    https://arxiv.org/abs/1406.1078
'''
from __future__ import print_function

from keras.models import Model
from keras.layers import Input, LSTM, Dense
import numpy as np
import keras

batch_size = 36  # Batch size for training.
epochs = 100  # Number of epochs to train for.
latent_dim = 256  # Latent dimensionality of the encoding space.
num_samples = 36  # Number of samples to train on.

input_sentences = open('bAbI_questions.txt','r')
info_sentences = open('bAbI_info.txt','r')
relation_sentences = open('bAbI_relations.txt','r')
fact_sentences = open('bAbI_facts.txt','r')
labels = open('bAbI_labels.txt','r')

# Vectorize the data.
input_texts = []
info_texts = []
target_texts = []
relat_texts = []
fact_texts = []

input_characters = set()
info_characters = set()
target_characters = set()
relat_characters = set()
fact_characters = set()

#process labels
for line in labels:
    target_text = line.split()
    target_text.append("\n")
    target_text.insert(0,"\t")
    target_texts.append(target_text)
    print(target_text)
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

#process questions
for line in input_sentences:
    input_text = line.split()
    input_texts.append(input_text)
    print(input_text)
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)

#process info
for line in info_sentences:
    info_text = line.split()
    info_texts.append(info_text)
    print(info_text)
    for char in info_text:
        if char not in info_characters:
            info_characters.add(char)

#process relations
for line in relation_sentences:
    relat_text = line.split()
    relat_texts.append(relat_text)
    print(relat_text)
    for char in relat_text:
        if char not in relat_characters:
            relat_characters.add(char)

#process facts
for line in fact_sentences:
    fact_text = line.split()
    fact_texts.append(fact_text)
    print(fact_text)
    for char in fact_text:
        if char not in fact_characters:
            fact_characters.add(char)
        
input_characters = sorted(list(input_characters))
info_characters = sorted(list(info_characters))
target_characters = sorted(list(target_characters))
relat_characters = sorted(list(relat_characters))
fact_characters = sorted(list(fact_characters))

num_encoder1_tokens = len(input_characters)
num_encoder2_tokens = len(info_characters)
num_encoder3_tokens = len(relat_characters)
num_encoder4_tokens = len(fact_characters)

num_decoder_tokens = len(target_characters)
max_encoder1_seq_length = max([len(txt) for txt in input_texts])
max_encoder2_seq_length = max([len(txt) for txt in info_texts])
max_encoder3_seq_length = max([len(txt) for txt in relat_texts])
max_encoder4_seq_length = max([len(txt) for txt in fact_texts])

max_decoder_seq_length = max([len(txt) for txt in target_texts])

print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder1_tokens)
print('Number of unique info tokens:', num_encoder2_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder1_seq_length)
print('Max sequence length for info:', max_encoder2_seq_length)
print('Max sequence length for relat:', max_encoder3_seq_length)
print('Max sequence length for facts:', max_encoder4_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)


['\t', 'Mary', 'IN', 'bedroom', '\n']
['\t', 'Sandra', 'IN', 'bedroom', '\n']
['\t', 'WHERE-IS', 'Mary', '\n']
['\t', 'football', 'WAS-IN', 'bedroom', 'REPEAT', '\n']
['\t', 'football', 'WITH', 'Mary', '\n']
['\t', 'John', 'IN', 'bedroom', '\n']
['\t', 'Mary', 'IN', 'office', 'REPEAT', '\n']
['\t', 'football', 'IN', 'office', '\n']
['\t', 'John', 'IN', 'office', '\n']
['\t', 'WHERE-IS', 'John', '\n']
['\t', 'milk', 'WAS-IN', 'office', 'REPEAT', '\n']
['\t', 'milk', 'WITH', 'John', '\n']
['\t', 'Daniel', 'IN', 'kitchen', '\n']
['\t', 'John', 'IN', 'bedroom', 'REPEAT', '\n']
['\t', 'milk', 'IN', 'bedroom', '\n']
['\t', 'Daniel', 'IN', 'hallway', '\n']
['\t', 'WHERE-IS', 'Daniel', '\n']
['\t', 'apple', 'WAS-IN', 'hallway', 'REPEAT', '\n']
['\t', 'apple', 'WITH', 'Daniel', '\n']
['\t', 'WHERE-IS', 'John', '\n']
['\t', 'milk', 'IN', 'bedroom', 'REPEAT', '\n']
['\t', 'milk', 'NOT-WITH', 'John', '\n']
['\t', 'John', 'IN', 'kitchen', '\n']
['\t', 'Sandra', 'IN', 'bathroom', '\n']
['\t', 'Danie

In [9]:
print('Length of questions ', len(input_texts))
print('Length of relations ', len(relat_texts))
print('Length of info ', len(info_texts))
print('Length of facts ', len(fact_texts))
print('Length of labels ', len(target_texts))

Length of questions  43
Length of relations  43
Length of info  43
Length of facts  43
Length of labels  43


In [10]:
input_token_index = dict(
    [(char, i) for i, char in enumerate(input_characters)])
info_token_index = dict(
    [(char, i) for i, char in enumerate(info_characters)])
relat_token_index = dict(
    [(char, i) for i, char in enumerate(relat_characters)])
fact_token_index = dict(
    [(char, i) for i, char in enumerate(fact_characters)])
target_token_index = dict(
    [(char, i) for i, char in enumerate(target_characters)])

encoder1_input_data = np.zeros(
    (len(input_texts), max_encoder1_seq_length, num_encoder1_tokens),
    dtype='float32')
encoder2_input_data = np.zeros(
    (len(input_texts), max_encoder2_seq_length, num_encoder2_tokens),
    dtype='float32')
encoder3_input_data = np.zeros(
    (len(input_texts), max_encoder3_seq_length, num_encoder3_tokens),
    dtype='float32')
encoder4_input_data = np.zeros(
    (len(input_texts), max_encoder4_seq_length, num_encoder4_tokens),
    dtype='float32')

decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

for i, (input_text, target_text, info_text, relat_text, fact_text) in enumerate(zip(input_texts, target_texts, info_texts, relat_texts, fact_texts)):
    for t, char in enumerate(input_text):
        encoder1_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(info_text):
        encoder2_input_data[i, t, info_token_index[char]] = 1.
    for t, char in enumerate(relat_text):
        encoder3_input_data[i, t, relat_token_index[char]] = 1.
    for t, char in enumerate(fact_text):
        encoder4_input_data[i, t, fact_token_index[char]] = 1.
    for t, char in enumerate(target_text):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.

In [11]:
# Define an input sequence and process it.
encoder1_inputs = Input(shape=(None, num_encoder1_tokens))
encoder2_inputs = Input(shape=(None, num_encoder2_tokens))
encoder3_inputs = Input(shape=(None, num_encoder3_tokens))
encoder4_inputs = Input(shape=(None, num_encoder4_tokens))

encoder1 = LSTM(latent_dim, return_state=True)
encoder_outputs1, state_h1, state_c1 = encoder1(encoder1_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states1 = [state_h1, state_c1]

encoder2 = LSTM(latent_dim, return_state=True)
encoder_outputs2, state_h2, state_c2 = encoder2(encoder2_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states2 = [state_h2, state_c2]

encoder3 = LSTM(latent_dim, return_state=True)
encoder_outputs3, state_h3, state_c3 = encoder3(encoder3_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states3 = [state_h3, state_c3]

encoder4 = LSTM(latent_dim, return_state=True)
encoder_outputs4, state_h4, state_c4 = encoder4(encoder4_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states4 = [state_h4, state_c4]

added_h = keras.layers.Concatenate(axis=1)([state_h1, state_h2, state_h3, state_h4])
added_c = keras.layers.Concatenate(axis=1)([state_c1, state_c2, state_c3, state_c4])
encoder_states = [added_h,added_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None, num_decoder_tokens))
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim*4, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder1_inputs, encoder2_inputs, encoder3_inputs, encoder4_inputs, decoder_inputs], decoder_outputs)

# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit([encoder1_input_data, encoder2_input_data, encoder3_input_data, encoder4_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2)
# Save model
model.save('s2s.h5')

Train on 34 samples, validate on 9 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoc

  str(node.arguments) + '. They will not be included '


In [12]:
# Next: inference mode (sampling).
# Here's the drill:
# 1) encode input and retrieve initial decoder state
# 2) run one step of decoder with this initial state
# and a "start of sequence" token as target.
# Output will be the next target token
# 3) Repeat with the current target token and current states

# Define sampling models
encoder_model = Model([encoder1_inputs, encoder2_inputs, encoder3_inputs, encoder4_inputs], encoder_states)

decoder_state_input_h = Input(shape=(latent_dim*4,))
decoder_state_input_c = Input(shape=(latent_dim*4,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

In [13]:
# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_info_char_index = dict(
    (i, char) for char, i in info_token_index.items())
reverse_relat_char_index = dict(
    (i, char) for char, i in relat_token_index.items())
reverse_fact_char_index = dict(
    (i, char) for char, i in fact_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())

In [14]:
def decode_sequence(input1_seq, input2_seq, input3_seq, input4_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict([input1_seq, input2_seq, input3_seq, input4_seq])

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_token_index['\t']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    char_count = 0
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char + " "
        char_count += 1
        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '\n' or
           char_count > max_decoder_seq_length):
            stop_condition = True
        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_sentence


for seq_index in range(42):
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input1_seq = encoder1_input_data[seq_index: seq_index + 1]
    input2_seq = encoder2_input_data[seq_index: seq_index + 1]
    input3_seq = encoder3_input_data[seq_index: seq_index + 1]
    input4_seq = encoder4_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input1_seq, input2_seq, input3_seq, input4_seq)
    
    input_sentence = ""
    for char in input_texts[seq_index]:
        input_sentence += char + " "
    
    info_sentence = ""
    for char in info_texts[seq_index]:
        info_sentence += char + " "
    
    relat_sentence = ""
    for char in relat_texts[seq_index]:
        relat_sentence += char + " "
    
    fact_sentence = ""
    for char in fact_texts[seq_index]:
        fact_sentence += char + " "
    
    print('-')
    print('Info sentence:', info_sentence)
    print('Question sentence:', input_sentence)
    print('Relat sentence:', relat_sentence)
    print('Fact sentence:', fact_sentence)
    print('Decoded sentence:', decoded_sentence)

-
Info sentence: Mary went to the bedroom 
Question sentence: where was the football before the bathroom ? 
Relat sentence: NONE 
Fact sentence: NONE 
Decoded sentence: Mary IN bedroom 
 
-
Info sentence: Sandra went to the bedroom 
Question sentence: where was the football before the bathroom ? 
Relat sentence: NONE 
Fact sentence: NONE 
Decoded sentence: Sandra IN bedroom 
 
-
Info sentence: Mary got the football there 
Question sentence: where was the football before the bathroom ? 
Relat sentence: NONE 
Fact sentence: NONE 
Decoded sentence: WHERE-IS Mary 
 
-
Info sentence: Mary got the football there 
Question sentence: where was the football before the bathroom ? 
Relat sentence: Mary IN bedroom 
Fact sentence: NONE 
Decoded sentence: football WAS-IN bedroom REPEAT 
 
-
Info sentence: Mary got the football there 
Question sentence: where was the football before the bathroom ? 
Relat sentence: football WAS-IN bedroom REPEAT 
Fact sentence: NONE 
Decoded sentence: football WITH Ma