In [1]:
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.utils import plot_model
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [2]:
batch_size = 64
epochs = 100
latent_dimension = 512
num_samples = 100000
data_path = 'fra-eng/fra.txt'

In [3]:
input_texts = []
target_texts = []

In [4]:
input_characters = set()
target_characters = set()

In [5]:
with open(data_path, 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')

In [6]:
len(lines)

160873

In [7]:
for line in lines[: min(num_samples, len(lines) - 1)]:
    
#     input_text, target_text = line.split('\t')
    target_text, input_text = line.lower().split('\t')
    target_text = '\t' + target_text + '\n'
    
    input_texts.append(input_text)
    target_texts.append(target_text)
    
    for char in input_text:
        if char not in input_characters:
            input_characters.add(char)
            
    for char in target_text:
        if char not in target_characters:
            target_characters.add(char)

In [8]:
len(input_texts)

100000

In [9]:
input_texts

['va !',
 'salut !',
 'cours\u202f!',
 'courez\u202f!',
 'ça alors\u202f!',
 'au feu !',
 "à l'aide\u202f!",
 'saute.',
 'ça suffit\u202f!',
 'stop\u202f!',
 'arrête-toi !',
 'attends !',
 'attendez !',
 'poursuis.',
 'continuez.',
 'poursuivez.',
 'bonjour !',
 'salut !',
 'je comprends.',
 "j'essaye.",
 "j'ai gagné !",
 "je l'ai emporté !",
 'oh non !',
 'attaque !',
 'attaquez !',
 'santé !',
 'à votre santé !',
 'merci !',
 'tchin-tchin !',
 'lève-toi.',
 'va, maintenant.',
 'allez-y maintenant.',
 'vas-y maintenant.',
 "j'ai pigé !",
 'compris !',
 'pigé\u202f?',
 'compris\u202f?',
 "t'as capté\u202f?",
 'monte.',
 'montez.',
 'serre-moi dans tes bras !',
 'serrez-moi dans vos bras !',
 'je suis tombée.',
 'je suis tombé.',
 'je sais.',
 'je suis parti.',
 'je suis partie.',
 "j'ai perdu.",
 "j'ai 19 ans.",
 'je vais bien.',
 'ça va.',
 'écoutez !',
 "c'est pas possible\u202f!",
 'impossible\u202f!',
 'en aucun cas.',
 'sans façons\u202f!',
 "c'est hors de question !",
 "il n'en e

In [10]:
len(target_texts)

100000

In [11]:
target_texts

['\tgo.\n',
 '\thi.\n',
 '\trun!\n',
 '\trun!\n',
 '\twow!\n',
 '\tfire!\n',
 '\thelp!\n',
 '\tjump.\n',
 '\tstop!\n',
 '\tstop!\n',
 '\tstop!\n',
 '\twait!\n',
 '\twait!\n',
 '\tgo on.\n',
 '\tgo on.\n',
 '\tgo on.\n',
 '\thello!\n',
 '\thello!\n',
 '\ti see.\n',
 '\ti try.\n',
 '\ti won!\n',
 '\ti won!\n',
 '\toh no!\n',
 '\tattack!\n',
 '\tattack!\n',
 '\tcheers!\n',
 '\tcheers!\n',
 '\tcheers!\n',
 '\tcheers!\n',
 '\tget up.\n',
 '\tgo now.\n',
 '\tgo now.\n',
 '\tgo now.\n',
 '\tgot it!\n',
 '\tgot it!\n',
 '\tgot it?\n',
 '\tgot it?\n',
 '\tgot it?\n',
 '\thop in.\n',
 '\thop in.\n',
 '\thug me.\n',
 '\thug me.\n',
 '\ti fell.\n',
 '\ti fell.\n',
 '\ti know.\n',
 '\ti left.\n',
 '\ti left.\n',
 '\ti lost.\n',
 "\ti'm 19.\n",
 "\ti'm ok.\n",
 "\ti'm ok.\n",
 '\tlisten.\n',
 '\tno way!\n',
 '\tno way!\n',
 '\tno way!\n',
 '\tno way!\n',
 '\tno way!\n',
 '\tno way!\n',
 '\tno way!\n',
 '\tno way!\n',
 '\tno way!\n',
 '\treally?\n',
 '\treally?\n',
 '\treally?\n',
 '\tthanks.\n',
 '\

In [12]:
len(input_characters)

74

In [13]:
input_characters = sorted(list(input_characters))

In [14]:
input_characters

[' ',
 '!',
 '"',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '+',
 ',',
 '-',
 '.',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '\xa0',
 '«',
 '»',
 'à',
 'â',
 'ç',
 'è',
 'é',
 'ê',
 'ë',
 'î',
 'ï',
 'ô',
 'ù',
 'û',
 'œ',
 'с',
 '\u2009',
 '\u200b',
 '‘',
 '’',
 '\u202f',
 '‽']

In [15]:
len(target_characters)

57

In [16]:
target_characters = sorted(list(target_characters))

In [17]:
target_characters

['\t',
 '\n',
 ' ',
 '!',
 '"',
 '$',
 '%',
 '&',
 "'",
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '\xa0',
 'ç',
 'é',
 '‘',
 '’',
 '€']

In [18]:
num_encoder_tokens = len(input_characters)

In [19]:
num_decoder_tokens = len(target_characters)

In [20]:
max_encoder_seq_length = max([len(txt) for txt in input_texts])

In [21]:
max_decoder_seq_length = max([len(txt) for txt in target_texts])

In [22]:
print('Max sequence length for inputs:', max_encoder_seq_length)

Max sequence length for inputs: 76


In [23]:
print('Max sequence length for outputs:', max_decoder_seq_length)

Max sequence length for outputs: 34


In [24]:
input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])

In [25]:
input_token_index

{' ': 0,
 '!': 1,
 '"': 2,
 '$': 3,
 '%': 4,
 '&': 5,
 "'": 6,
 '(': 7,
 ')': 8,
 '+': 9,
 ',': 10,
 '-': 11,
 '.': 12,
 '0': 13,
 '1': 14,
 '2': 15,
 '3': 16,
 '4': 17,
 '5': 18,
 '6': 19,
 '7': 20,
 '8': 21,
 '9': 22,
 ':': 23,
 '?': 24,
 'a': 25,
 'b': 26,
 'c': 27,
 'd': 28,
 'e': 29,
 'f': 30,
 'g': 31,
 'h': 32,
 'i': 33,
 'j': 34,
 'k': 35,
 'l': 36,
 'm': 37,
 'n': 38,
 'o': 39,
 'p': 40,
 'q': 41,
 'r': 42,
 's': 43,
 't': 44,
 'u': 45,
 'v': 46,
 'w': 47,
 'x': 48,
 'y': 49,
 'z': 50,
 '\xa0': 51,
 '«': 52,
 '»': 53,
 'à': 54,
 'â': 55,
 'ç': 56,
 'è': 57,
 'é': 58,
 'ê': 59,
 'ë': 60,
 'î': 61,
 'ï': 62,
 'ô': 63,
 'ù': 64,
 'û': 65,
 'œ': 66,
 'с': 67,
 '\u2009': 68,
 '\u200b': 69,
 '‘': 70,
 '’': 71,
 '\u202f': 72,
 '‽': 73}

In [26]:
target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

In [27]:
target_token_index

{'\t': 0,
 '\n': 1,
 ' ': 2,
 '!': 3,
 '"': 4,
 '$': 5,
 '%': 6,
 '&': 7,
 "'": 8,
 ',': 9,
 '-': 10,
 '.': 11,
 '/': 12,
 '0': 13,
 '1': 14,
 '2': 15,
 '3': 16,
 '4': 17,
 '5': 18,
 '6': 19,
 '7': 20,
 '8': 21,
 '9': 22,
 ':': 23,
 '?': 24,
 'a': 25,
 'b': 26,
 'c': 27,
 'd': 28,
 'e': 29,
 'f': 30,
 'g': 31,
 'h': 32,
 'i': 33,
 'j': 34,
 'k': 35,
 'l': 36,
 'm': 37,
 'n': 38,
 'o': 39,
 'p': 40,
 'q': 41,
 'r': 42,
 's': 43,
 't': 44,
 'u': 45,
 'v': 46,
 'w': 47,
 'x': 48,
 'y': 49,
 'z': 50,
 '\xa0': 51,
 'ç': 52,
 'é': 53,
 '‘': 54,
 '’': 55,
 '€': 56}

In [28]:
encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
    dtype='float32')

In [29]:
encoder_input_data.shape

(100000, 76, 74)

In [30]:
encoder_input_data

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [31]:
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

In [32]:
decoder_input_data.shape

(100000, 34, 57)

In [33]:
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

In [34]:
decoder_target_data.shape

(100000, 34, 57)

In [35]:
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(target_text):
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.
            
# print("Sample values of i, input_text, target_text : ",i,input_text, target_text)
# print("Sample values of t, char, input_token_index[char] : ", t,char,input_token_index[char] )

In [36]:
print(encoder_input_data)

[[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [1. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


In [37]:
print(decoder_input_data)

[[[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]]

 [[1. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]]]


In [38]:
print(decoder_target_data)

[[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


In [39]:
encoder_inputs = Input(shape=(None, num_encoder_tokens))

In [40]:
encoder = LSTM(latent_dimension, return_state=True)

In [41]:
encoder_outputs, state_h, state_c = encoder(encoder_inputs)

Instructions for updating:
Colocations handled automatically by placer.


In [42]:
encoder_states = [state_h, state_c]

In [43]:
decoder_inputs = Input(shape=(None, num_decoder_tokens))

In [44]:
decoder_lstm = LSTM(latent_dimension, return_sequences=True, return_state=True)

In [45]:
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)

In [46]:
decoder_dense = Dense(num_decoder_tokens, activation='softmax')

In [47]:
decoder_outputs = decoder_dense(decoder_outputs)

In [48]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [49]:
plot_model(model, to_file='model.png', show_shapes=True)

In [50]:
%%html
<img src="model.png">

In [51]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [52]:
filepath="saved_models/weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
earlyStopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

In [53]:
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2,
          callbacks=[TensorBoard(log_dir='tmp/autoencoder'), checkpoint, earlyStopping])

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 80000 samples, validate on 20000 samples
Epoch 1/100

Epoch 00001: loss improved from inf to 1.18485, saving model to saved_models/weights.best.hdf5


  '. They will not be included '


Epoch 2/100

Epoch 00002: loss improved from 1.18485 to 0.77372, saving model to saved_models/weights.best.hdf5
Epoch 3/100

Epoch 00003: loss improved from 0.77372 to 0.64150, saving model to saved_models/weights.best.hdf5
Epoch 4/100

Epoch 00004: loss improved from 0.64150 to 0.57155, saving model to saved_models/weights.best.hdf5
Epoch 5/100

Epoch 00005: loss improved from 0.57155 to 0.52344, saving model to saved_models/weights.best.hdf5
Epoch 6/100

Epoch 00006: loss improved from 0.52344 to 0.48395, saving model to saved_models/weights.best.hdf5
Epoch 7/100

Epoch 00007: loss improved from 0.48395 to 0.45249, saving model to saved_models/weights.best.hdf5
Epoch 8/100

Epoch 00008: loss improved from 0.45249 to 0.42661, saving model to saved_models/weights.best.hdf5
Epoch 9/100

Epoch 00009: loss improved from 0.42661 to 0.40501, saving model to saved_models/weights.best.hdf5
Epoch 10/100

Epoch 00010: loss improved from 0.40501 to 0.38669, saving model to saved_models/weights.b

<keras.callbacks.History at 0x44dd4da0>

In [54]:
encoder_model = Model(encoder_inputs, encoder_states)

In [55]:
plot_model(encoder_model, to_file='encoder_model.png', show_shapes=True)

In [56]:
%%html
<img src="encoder_model.png">

In [57]:
decoder_state_input_h = Input(shape=(latent_dimension,))

In [58]:
decoder_state_input_c = Input(shape=(latent_dimension,))

In [59]:
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

In [60]:
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)

In [61]:
decoder_states = [state_h, state_c]

In [62]:
decoder_outputs = decoder_dense(decoder_outputs)

In [63]:
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

In [64]:
plot_model(decoder_model, to_file='decoder_model.png', show_shapes=True)

In [65]:
%%html
<img src="decoder_model.png">

In [66]:
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())

In [67]:
reverse_input_char_index

{0: ' ',
 1: '!',
 2: '"',
 3: '$',
 4: '%',
 5: '&',
 6: "'",
 7: '(',
 8: ')',
 9: '+',
 10: ',',
 11: '-',
 12: '.',
 13: '0',
 14: '1',
 15: '2',
 16: '3',
 17: '4',
 18: '5',
 19: '6',
 20: '7',
 21: '8',
 22: '9',
 23: ':',
 24: '?',
 25: 'a',
 26: 'b',
 27: 'c',
 28: 'd',
 29: 'e',
 30: 'f',
 31: 'g',
 32: 'h',
 33: 'i',
 34: 'j',
 35: 'k',
 36: 'l',
 37: 'm',
 38: 'n',
 39: 'o',
 40: 'p',
 41: 'q',
 42: 'r',
 43: 's',
 44: 't',
 45: 'u',
 46: 'v',
 47: 'w',
 48: 'x',
 49: 'y',
 50: 'z',
 51: '\xa0',
 52: '«',
 53: '»',
 54: 'à',
 55: 'â',
 56: 'ç',
 57: 'è',
 58: 'é',
 59: 'ê',
 60: 'ë',
 61: 'î',
 62: 'ï',
 63: 'ô',
 64: 'ù',
 65: 'û',
 66: 'œ',
 67: 'с',
 68: '\u2009',
 69: '\u200b',
 70: '‘',
 71: '’',
 72: '\u202f',
 73: '‽'}

In [68]:
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())

In [69]:
reverse_target_char_index

{0: '\t',
 1: '\n',
 2: ' ',
 3: '!',
 4: '"',
 5: '$',
 6: '%',
 7: '&',
 8: "'",
 9: ',',
 10: '-',
 11: '.',
 12: '/',
 13: '0',
 14: '1',
 15: '2',
 16: '3',
 17: '4',
 18: '5',
 19: '6',
 20: '7',
 21: '8',
 22: '9',
 23: ':',
 24: '?',
 25: 'a',
 26: 'b',
 27: 'c',
 28: 'd',
 29: 'e',
 30: 'f',
 31: 'g',
 32: 'h',
 33: 'i',
 34: 'j',
 35: 'k',
 36: 'l',
 37: 'm',
 38: 'n',
 39: 'o',
 40: 'p',
 41: 'q',
 42: 'r',
 43: 's',
 44: 't',
 45: 'u',
 46: 'v',
 47: 'w',
 48: 'x',
 49: 'y',
 50: 'z',
 51: '\xa0',
 52: 'ç',
 53: 'é',
 54: '‘',
 55: '’',
 56: '€'}

In [70]:
def decode_sequence(input_seq):

    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, num_decoder_tokens))

    target_seq[0, 0, target_token_index['\t']] = 1

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)
        
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char


        if (sampled_char == '\n' or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True


        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        states_value = [h, c]

    return decoded_sentence

In [74]:
for seq_index in range(99900, 100000):
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    
    decoded_sentence = decode_sequence(input_seq)
    
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

-
Input sentence: j'aime écrire des chansons en français.
Decoded sentence: i like the way you think.

-
Input sentence: je préfère les trains aux bus.
Decoded sentence: i prefer to wake up at six.

-
Input sentence: j'aime travailler pour cette entreprise.
Decoded sentence: i like the way you think.

-
Input sentence: je t'aime juste comme tu es.
Decoded sentence: i promise i'll be back.

-
Input sentence: j'ai apprécié de vous avoir ici ce soir.
Decoded sentence: i liked your friends wime.

-
Input sentence: j'ai apprécié de t'avoir ici ce soir.
Decoded sentence: i liked your friends wime.

-
Input sentence: j'écoutai, mais je n'entendis rien.
Decoded sentence: i got a beer with someone.

-
Input sentence: je vis à boston avec ma famille.
Decoded sentence: i live in a right after.

-
Input sentence: l'année dernière je vivais à sanda city.
Decoded sentence: the boy caught a large fish.

-
Input sentence: je me suis enfermé à l'extérieur de ma maison.
Decoded sentence: i got up early 

-
Input sentence: je n'ai que trois dollars sur moi.
Decoded sentence: i have no idea why it is so.

-
Input sentence: j'espère seulement que vous n'êtes pas trop en retard.
Decoded sentence: i hope i'm not too late for.

-
Input sentence: j'espère seulement que tu n'es pas trop en retard.
Decoded sentence: i hope i'm not too late for.

-
Input sentence: je souhaiterais seulement être en mesure de vous aider.
Decoded sentence: i love you a lot of money.

-
Input sentence: je souhaiterais seulement être en mesure de t'aider.
Decoded sentence: i love you a lot of money.

-
Input sentence: j'ai commandé ce livre depuis l'angleterre.
Decoded sentence: i bought a new car.

-
Input sentence: j'ai commandé le livre en angleterre.
Decoded sentence: i bought a new car.

-
Input sentence: j'ai commandé le livre depuis l'angleterre.
Decoded sentence: i bought a new car.

-
Input sentence: j'ai payé 2 000 yens pour cet atlas.
Decoded sentence: i told tom to come home.

-
Input sentence: j'ai payé 

In [72]:
import pickle
encoder_modelFile = open('encoder_model.pickle', 'wb')  
pickle.dump(encoder_model, encoder_modelFile)                      
encoder_modelFile.close() 

In [73]:
decoder_modelFile = open('decoder_model.pickle', 'wb')  
pickle.dump(decoder_model, decoder_modelFile)                      
decoder_modelFile.close() 

  '. They will not be included '
