In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.src.utils import plot_model

In [2]:
data_path = [
    ("Go.", "Va !"),
    ("Run!", "Cours !"),
    ("Run.", "Cours !"),
    ("Who?", "Qui ?"),
    ("Wow!", "Ça alors !"),
    ("Fire!", "Au feu !"),
    ("Help!", "À l'aide !"),
    ("Stop!", "Arrête-toi !"),
    ("Wait!", "Attends !"),
    ("Hello!", "Bonjour !"),
    ("I see.", "Je comprends.")
]

### Data Preprocessing

In [3]:
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

for input, target in data_path:
    target = '\t' + target + '\n'
    input_texts.append(input)
    target_texts.append(target)

    for char in input:
        if char not in input_characters:
            input_characters.add(char)

    for char in target:
        if char not in target_characters:
            target_characters.add(char)

In [4]:
data = {'input_texts': input_texts, 'target_texts': target_texts, 'input_characters': input_characters,
        'target_characters': target_characters}

In [5]:
print(data)

{'input_texts': ['Go.', 'Run!', 'Run.', 'Who?', 'Wow!', 'Fire!', 'Help!', 'Stop!', 'Wait!', 'Hello!', 'I see.'], 'target_texts': ['\tVa !\n', '\tCours !\n', '\tCours !\n', '\tQui ?\n', '\tÇa alors !\n', '\tAu feu !\n', "\tÀ l'aide !\n", '\tArrête-toi !\n', '\tAttends !\n', '\tBonjour !\n', '\tJe comprends.\n'], 'input_characters': {'w', 'l', 'n', '!', 'p', 'W', 'o', 'h', 'R', 'S', 'H', ' ', 't', 'a', 'i', 'u', 'r', '?', 'F', 'I', '.', 'G', 'e', 's'}, 'target_characters': {'d', 'B', 'l', 'n', 'Ç', '!', 'p', 'Q', 'C', 'o', 'A', 'ê', 'm', 'f', 'j', '\t', 'J', 'V', ' ', '-', "'", 't', '\n', 'i', 'c', 'u', 'r', 'À', '?', '.', 'a', 'e', 's'}}


In [6]:
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))

In [7]:
num_enc_tokens = len(input_characters)
num_dec_tokens = len(target_characters)

In [8]:
num_enc_tokens, num_dec_tokens

(24, 33)

In [9]:
max_enc_length = max([len(seq) for seq in input_texts])

In [10]:
max_dec_length = max([len(seq) for seq in target_texts])

In [11]:
max_enc_length, max_dec_length

(6, 15)

### Create token mappings (char -> int)

In [12]:
input_token_index = {char: i for i, char in enumerate(input_characters)}
target_token_index = {char: i for i, char in enumerate(target_characters)}

input_token_index, target_token_index

({' ': 0,
  '!': 1,
  '.': 2,
  '?': 3,
  'F': 4,
  'G': 5,
  'H': 6,
  'I': 7,
  'R': 8,
  'S': 9,
  'W': 10,
  'a': 11,
  'e': 12,
  'h': 13,
  'i': 14,
  'l': 15,
  'n': 16,
  'o': 17,
  'p': 18,
  'r': 19,
  's': 20,
  't': 21,
  'u': 22,
  'w': 23},
 {'\t': 0,
  '\n': 1,
  ' ': 2,
  '!': 3,
  "'": 4,
  '-': 5,
  '.': 6,
  '?': 7,
  'A': 8,
  'B': 9,
  'C': 10,
  'J': 11,
  'Q': 12,
  'V': 13,
  'a': 14,
  'c': 15,
  'd': 16,
  'e': 17,
  'f': 18,
  'i': 19,
  'j': 20,
  'l': 21,
  'm': 22,
  'n': 23,
  'o': 24,
  'p': 25,
  'r': 26,
  's': 27,
  't': 28,
  'u': 29,
  'À': 30,
  'Ç': 31,
  'ê': 32})

### Create inverse token mappings (int -> char)


In [13]:
reverse_input_token_index = {i: char for i, char in enumerate(input_characters)}
reverse_target_token_index = {i: char for i, char in enumerate(target_characters)}

In [14]:
reverse_input_token_index, reverse_target_token_index

({0: ' ',
  1: '!',
  2: '.',
  3: '?',
  4: 'F',
  5: 'G',
  6: 'H',
  7: 'I',
  8: 'R',
  9: 'S',
  10: 'W',
  11: 'a',
  12: 'e',
  13: 'h',
  14: 'i',
  15: 'l',
  16: 'n',
  17: 'o',
  18: 'p',
  19: 'r',
  20: 's',
  21: 't',
  22: 'u',
  23: 'w'},
 {0: '\t',
  1: '\n',
  2: ' ',
  3: '!',
  4: "'",
  5: '-',
  6: '.',
  7: '?',
  8: 'A',
  9: 'B',
  10: 'C',
  11: 'J',
  12: 'Q',
  13: 'V',
  14: 'a',
  15: 'c',
  16: 'd',
  17: 'e',
  18: 'f',
  19: 'i',
  20: 'j',
  21: 'l',
  22: 'm',
  23: 'n',
  24: 'o',
  25: 'p',
  26: 'r',
  27: 's',
  28: 't',
  29: 'u',
  30: 'À',
  31: 'Ç',
  32: 'ê'})

### Generate one-hot encoded data

#### Encoder Input -> (num_samples, max_len, unique_chars)


In [15]:
enc_data_input = np.zeros((len(input_texts), max_enc_length, num_enc_tokens), dtype=np.int32)

In [16]:
enc_data_input.shape

(11, 6, 24)

#### Decoder Input -> (num_samples, max_len, unique_chars)

In [17]:
dec_data_input = np.zeros((len(input_texts), max_dec_length, num_dec_tokens))
dec_data_input.shape

(11, 15, 33)

#### Decoder output

In [18]:
dec_data_target = np.zeros((len(input_texts), max_dec_length, num_dec_tokens))
dec_data_target.shape

(11, 15, 33)

In [19]:
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        enc_data_input[i, t, input_token_index[char]] = 1
    enc_data_input[i, t + 1:, input_token_index[' ']] = 1  # padding

    for t, char in enumerate(target_text):
        dec_data_input[i, t, target_token_index[char]] = 1
        if t > 0:
            dec_data_input[i, t - 1:, target_token_index[char]] = 1
    dec_data_input[i, t + 1:, target_token_index[' ']] = 1  # padding
    dec_data_target[i, t:, target_token_index[' ']] = 1  # padding


In [20]:
enc_data_input

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 1, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 1, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 1, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 1, 0, 0],
        [0, 1, 0, ..., 0, 0, 0],
        [1, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 1, 0, ...,

In [21]:
dec_data_input

array([[[1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.]],

       ...,

       [[1., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0., 0.],
        [0., 1., 1., ..., 0., 0.

In [22]:
dec_data_target

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0.

### Create the encoder

In [23]:
enc_inputs = Input(shape=(None, num_enc_tokens))
encoder = LSTM(256, return_state=True)
enc_outputs, state_h, state_c = encoder(enc_inputs)

### Create the decoder

In [24]:
dec_inputs = Input(shape=(None, num_dec_tokens))
decoder_lstm = LSTM(256, return_state=True, return_sequences=True)
dec_outputs, *_ = decoder_lstm(dec_inputs, initial_state=[state_h, state_c])

decoder_dense = Dense(num_dec_tokens, activation='softmax')
dec_outputs = decoder_dense(dec_outputs)

In [25]:
model = Model(inputs=[enc_inputs, dec_inputs], outputs=dec_outputs)

In [26]:
model.summary()

In [32]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [33]:
history = model.fit([enc_data_input, dec_data_input], dec_data_target, epochs=100)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3636 - loss: 0.0770
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step - accuracy: 0.3636 - loss: 0.0631
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 0.3636 - loss: 0.1942
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - accuracy: 0.3636 - loss: 0.1355
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step - accuracy: 0.3636 - loss: 0.1088
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step - accuracy: 0.3636 - loss: 0.0983
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.3636 - loss: 0.1436
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step - accuracy: 0.3636 - loss: 0.0935
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[