In [1]:
# %aimport helper, tests
# %autoreload 1

In [None]:
import collections

import helper
import numpy as np
import project_tests as tests

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    GRU,
    Input,
    Dense,
    TimeDistributed,
    Activation,
    RepeatVector,
    Bidirectional,
    Embedding
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import sparse_categorical_crossentropy

## Dataset

### Load Data dummy tokenized vocab


In [None]:

english_sentences = helper.load_data('data/small_vocab_en')

french_sentences = helper.load_data('data/small_vocab_fr')

print('Dataset Loaded')

Dataset Loaded


### Files


In [4]:
for sample_i in range(2):
    print('small_vocab_en Line {}:  {}'.format(sample_i + 1, english_sentences[sample_i]))
    print('small_vocab_fr Line {}:  {}'.format(sample_i + 1, french_sentences[sample_i]))

small_vocab_en Line 1:  new jersey is sometimes quiet during autumn , and it is snowy in april .
small_vocab_fr Line 1:  new jersey est parfois calme pendant l' automne , et il est neigeux en avril .
small_vocab_en Line 2:  the united states is usually chilly during july , and it is usually freezing in november .
small_vocab_fr Line 2:  les états-unis est généralement froid en juillet , et il gèle habituellement en novembre .



### Vocabulary


In [5]:
english_words_counter = collections.Counter([word for sentence in english_sentences for word in sentence.split()])
french_words_counter = collections.Counter([word for sentence in french_sentences for word in sentence.split()])

print('{} English words.'.format(len([word for sentence in english_sentences for word in sentence.split()])))
print('{} unique English words.'.format(len(english_words_counter)))
print('10 Most common words in the English dataset:')
print('"' + '" "'.join(list(zip(*english_words_counter.most_common(10)))[0]) + '"')
print()
print('{} French words.'.format(len([word for sentence in french_sentences for word in sentence.split()])))
print('{} unique French words.'.format(len(french_words_counter)))
print('10 Most common words in the French dataset:')
print('"' + '" "'.join(list(zip(*french_words_counter.most_common(10)))[0]) + '"')

1823250 English words.
227 unique English words.
10 Most common words in the English dataset:
"is" "," "." "in" "it" "during" "the" "but" "and" "sometimes"

1961295 French words.
355 unique French words.
10 Most common words in the French dataset:
"est" "." "," "en" "il" "les" "mais" "et" "la" "parfois"


For comparison, _Alice's Adventures in Wonderland_ contains 2,766 unique words of a total of 15,500 words.
## Preprocess


Time to start preprocessing the data...
### Tokenize

In [6]:
def tokenize(x):
    """
    Tokenize x
    :param x: List of sentences/strings to be tokenized
    :return: Tuple of (tokenized x data, tokenizer used to tokenize x)
    """
    x_tk = Tokenizer()
    x_tk.fit_on_texts(x)
    return x_tk.texts_to_sequences(x), x_tk
tests.test_tokenize(tokenize)

# Tokenize Example output
text_sentences = [
    'The quick brown fox jumps over the lazy dog .',
    'By Jove , my quick study of lexicography won a prize .',
    'This is a short sentence .']
text_tokenized, text_tokenizer = tokenize(text_sentences)
print(text_tokenizer.word_index)
print()
for sample_i, (sent, token_sent) in enumerate(zip(text_sentences, text_tokenized)):
    print('Sequence {} in x'.format(sample_i + 1))
    print('  Input:  {}'.format(sent))
    print('  Output: {}'.format(token_sent))

{'the': 1, 'quick': 2, 'a': 3, 'brown': 4, 'fox': 5, 'jumps': 6, 'over': 7, 'lazy': 8, 'dog': 9, 'by': 10, 'jove': 11, 'my': 12, 'study': 13, 'of': 14, 'lexicography': 15, 'won': 16, 'prize': 17, 'this': 18, 'is': 19, 'short': 20, 'sentence': 21}

Sequence 1 in x
  Input:  The quick brown fox jumps over the lazy dog .
  Output: [1, 2, 4, 5, 6, 7, 1, 8, 9]
Sequence 2 in x
  Input:  By Jove , my quick study of lexicography won a prize .
  Output: [10, 11, 12, 2, 13, 14, 15, 16, 3, 17]
Sequence 3 in x
  Input:  This is a short sentence .
  Output: [18, 19, 3, 20, 21]


### Padding

Make sure all the English sequences have the same length and all the French sequences have the same length by adding padding to the **end** of each sequence using Keras's [`pad_sequences`](https://keras.io/preprocessing/sequence/#pad_sequences) function.

In [7]:
def pad(x, length=None):
    """
    Pad x
    :param x: List of sequences.
    :param length: Length to pad the sequence to.  If None, use length of longest sequence in x.
    :return: Padded numpy array of sequences
    """
    # TODO: Implement
    if length == None:
        length = max([len(sentence) for sentence in x])
    return pad_sequences(x,maxlen=length,padding='post')
tests.test_pad(pad)

# Pad Tokenized output
test_pad = pad(text_tokenized)
for sample_i, (token_sent, pad_sent) in enumerate(zip(text_tokenized, test_pad)):
    print('Sequence {} in x'.format(sample_i + 1))
    print('  Input:  {}'.format(np.array(token_sent)))
    print('  Output: {}'.format(pad_sent))

Sequence 1 in x
  Input:  [1 2 4 5 6 7 1 8 9]
  Output: [1 2 4 5 6 7 1 8 9 0]
Sequence 2 in x
  Input:  [10 11 12  2 13 14 15 16  3 17]
  Output: [10 11 12  2 13 14 15 16  3 17]
Sequence 3 in x
  Input:  [18 19  3 20 21]
  Output: [18 19  3 20 21  0  0  0  0  0]


### Preprocess Pipeline

In [None]:
def preprocess(x, y):
   
    preprocess_x, x_tk = tokenize(x)
    preprocess_y, y_tk = tokenize(y)

    preprocess_x = pad(preprocess_x)
    preprocess_y = pad(preprocess_y)

    # Keras's sparse_categorical_crossentropy function requires the labels to be in 3 dimensions
    preprocess_y = preprocess_y.reshape(*preprocess_y.shape, 1)

    return preprocess_x, preprocess_y, x_tk, y_tk

preproc_english_sentences, preproc_french_sentences, english_tokenizer, french_tokenizer =\
    preprocess(english_sentences, french_sentences)
    
max_english_sequence_length = preproc_english_sentences.shape[1]
max_french_sequence_length = preproc_french_sentences.shape[1]
english_vocab_size = len(english_tokenizer.word_index)
french_vocab_size = len(french_tokenizer.word_index)

print('Data Preprocessed')
print("Max English sentence length:", max_english_sequence_length)
print("Max French sentence length:", max_french_sequence_length)
print("English vocabulary size:", english_vocab_size)
print("French vocabulary size:", french_vocab_size)

Data Preprocessed
Max English sentence length: 15
Max French sentence length: 21
English vocabulary size: 199
French vocabulary size: 344


## Models

- Model 1 is a simple RNN
- Model 2 is a RNN with Embedding
- Model 3 is a Bidirectional RNN
- Model 4 is an optional Encoder-Decoder RNN


### Ids Back to Text


In [None]:
def logits_to_text(logits, tokenizer):
   
    index_to_words = {id: word for word, id in tokenizer.word_index.items()}
    index_to_words[0] = '<PAD>'

    return ' '.join([index_to_words[prediction] for prediction in np.argmax(logits, 1)])

print('`logits_to_text` function loaded.')

`logits_to_text` function loaded.


### Model 1: RNN 

In [None]:
def simple_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
    
    #Config Hyperparameter
    learning_rate = 0.01
    
    #Config Model
    inputs = Input(shape=input_shape[1:])
    hidden_layer = GRU(output_sequence_length, return_sequences=True)(inputs)
    # The output is the french_vocab_size~
    outputs = TimeDistributed(Dense(french_vocab_size, activation='softmax'))(hidden_layer)
    #Create Model from parameters defined above
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss=sparse_categorical_crossentropy,
                  optimizer=Adam(learning_rate),
                  metrics=['accuracy'])
    return model


tests.test_simple_model(simple_model)

# Reshaping the input to work with a basic RNN
tmp_x = pad(preproc_english_sentences, max_french_sequence_length)
tmp_x = tmp_x.reshape((-1, preproc_french_sentences.shape[-2], 1))
# Train the neural network
simple_rnn_model = simple_model(
        tmp_x.shape,
    max_french_sequence_length,
    english_vocab_size + 1, # +1 to vocabulary sizes to account for the 0-index reserved for padding.
    french_vocab_size + 1)
simple_rnn_model.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)
simple_rnn_model.summary()
print(logits_to_text(simple_rnn_model.predict(tmp_x[:1])[0], french_tokenizer))

Epoch 1/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 113ms/step - accuracy: 0.4018 - loss: 3.5874 - val_accuracy: 0.4885 - val_loss: 2.3770
Epoch 2/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 121ms/step - accuracy: 0.4911 - loss: 2.2829 - val_accuracy: 0.5218 - val_loss: 2.0617
Epoch 3/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 130ms/step - accuracy: 0.5243 - loss: 2.0225 - val_accuracy: 0.5475 - val_loss: 1.8710
Epoch 4/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 106ms/step - accuracy: 0.5560 - loss: 1.8171 - val_accuracy: 0.5739 - val_loss: 1.6900
Epoch 5/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 104ms/step - accuracy: 0.5780 - loss: 1.6686 - val_accuracy: 0.5797 - val_loss: 1.6209
Epoch 6/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 105ms/step - accuracy: 0.5807 - loss: 1.6087 - val_accuracy: 0.5882 - val_loss: 1.5771
Epoch 7/10

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step
new jersey est parfois parfois en en et il est est en en <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>


### Model 2: Embedding

In [13]:
def embed_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
    """
    Build and train a RNN model using word embedding on x and y
    :param input_shape: Tuple of input shape
    :param output_sequence_length: Length of output sequence
    :param english_vocab_size: Number of unique English words in the dataset
    :param french_vocab_size: Number of unique French words in the dataset
    :return: Keras model built, but not trained
    """
    learning_rate = 1e-2
    #Config Model
    inputs = Input(shape=input_shape[1:])
    embedding_layer = Embedding(input_dim=english_vocab_size,
                                output_dim=output_sequence_length,
                                mask_zero=False)(inputs)
    hidden_layer = GRU(output_sequence_length, return_sequences=True)(embedding_layer)
    outputs = TimeDistributed(Dense(french_vocab_size, activation='softmax'))(hidden_layer)
    
    #Create Model from parameters defined above
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss=sparse_categorical_crossentropy,
                  optimizer=Adam(learning_rate),
                  metrics=['accuracy'])    
    return model
tests.test_embed_model(embed_model)

tmp_x = pad(preproc_english_sentences, preproc_french_sentences.shape[1])
tmp_x = tmp_x.reshape((-1, preproc_french_sentences.shape[-2]))

# Train the neural network
embed_rnn_model = embed_model(
        tmp_x.shape,
    max_french_sequence_length,
    english_vocab_size + 1,
    french_vocab_size + 1)
embed_rnn_model.summary()
embed_rnn_model.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)

print(logits_to_text(embed_rnn_model.predict(tmp_x[:1])[0], french_tokenizer))

Epoch 1/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 132ms/step - accuracy: 0.4929 - loss: 3.0525 - val_accuracy: 0.6888 - val_loss: 1.2533
Epoch 2/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 113ms/step - accuracy: 0.7102 - loss: 1.1294 - val_accuracy: 0.7605 - val_loss: 0.8771
Epoch 3/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 120ms/step - accuracy: 0.7697 - loss: 0.8400 - val_accuracy: 0.7915 - val_loss: 0.7473
Epoch 4/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 112ms/step - accuracy: 0.7979 - loss: 0.7283 - val_accuracy: 0.8096 - val_loss: 0.6833
Epoch 5/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 135ms/step - accuracy: 0.8133 - loss: 0.6691 - val_accuracy: 0.8196 - val_loss: 0.6391
Epoch 6/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 125ms/step - accuracy: 0.8218 - loss: 0.6323 - val_accuracy: 0.8172 - val_loss: 0.6321
Epoch 7/10

### Model 3: Bidirectional RNNs (IMPLEMENTATION)

In [14]:
def bd_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
    """
    Build and train a bidirectional RNN model on x and y
    :param input_shape: Tuple of input shape
    :param output_sequence_length: Length of output sequence
    :param english_vocab_size: Number of unique English words in the dataset
    :param french_vocab_size: Number of unique French words in the dataset
    :return: Keras model built, but not trained
    """
    #Config Hyperparameters
    learning_rate = 0.01
    
    #Create Model
    inputs = Input(shape=input_shape[1:])
    hidden_layer = Bidirectional(GRU(output_sequence_length, return_sequences=True))(inputs)
    outputs = TimeDistributed(Dense(french_vocab_size, activation='softmax'))(hidden_layer)
    
    #Create Model from parameters defined above
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss=sparse_categorical_crossentropy,
                  optimizer=Adam(learning_rate),
                  metrics=['accuracy'])
    
    return model
    
tests.test_bd_model(bd_model)
tmp_x = pad(preproc_english_sentences, max_french_sequence_length)
tmp_x = tmp_x.reshape((-1, preproc_french_sentences.shape[-2], 1))

bd_mod = bd_model(
        tmp_x.shape,
    max_french_sequence_length,
    english_vocab_size + 1,
    french_vocab_size + 1)
bd_mod.summary()
bd_mod.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)
print(logits_to_text(bd_mod.predict(tmp_x[:1])[0], french_tokenizer))


Epoch 1/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 134ms/step - accuracy: 0.4423 - loss: 3.3749 - val_accuracy: 0.5665 - val_loss: 1.8985
Epoch 2/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 153ms/step - accuracy: 0.5847 - loss: 1.7551 - val_accuracy: 0.6136 - val_loss: 1.5230
Epoch 3/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 170ms/step - accuracy: 0.6175 - loss: 1.4917 - val_accuracy: 0.6279 - val_loss: 1.4232
Epoch 4/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 158ms/step - accuracy: 0.6291 - loss: 1.4074 - val_accuracy: 0.6391 - val_loss: 1.3643
Epoch 5/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 148ms/step - accuracy: 0.6389 - loss: 1.3545 - val_accuracy: 0.6453 - val_loss: 1.3121
Epoch 6/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 170ms/step - accuracy: 0.6467 - loss: 1.2997 - val_accuracy: 0.6510 - val_loss: 1.2639
Epoch 7/10

### Model 4: Encoder-Decoder 

In [15]:
def encdec_model(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
    """
    Build and train an encoder-decoder model on x and y
    :param input_shape: Tuple of input shape
    :param output_sequence_length: Length of output sequence
    :param english_vocab_size: Number of unique English words in the dataset
    :param french_vocab_size: Number of unique French words in the dataset
    :return: Keras model built, but not trained
    """
    # OPTIONAL: Implement
    learning_rate = 1e-2
    latent_dim = 128
    
    # Encoder
    encoder_input = Input(shape=input_shape[1:])
    encoder_gru = GRU(output_sequence_length)(encoder_input)
    encode_output = Dense(latent_dim,activation='relu')(encoder_gru)
    
    # Config Decode
    decoder_input = RepeatVector(output_sequence_length)(encode_output)
    decoder_gru = GRU(latent_dim,return_sequences=True)(decoder_input)
    output_layer = TimeDistributed(Dense(french_vocab_size,activation='softmax'))
    outputs = output_layer(decoder_gru)
    model = Model(inputs=encoder_input, outputs=outputs)
    model.compile(loss=sparse_categorical_crossentropy,
                  optimizer=Adam(learning_rate),
                  metrics=['accuracy'])
    return model

tests.test_encdec_model(encdec_model)
tmp_x = pad(preproc_english_sentences, max_french_sequence_length)
tmp_x = tmp_x.reshape((-1, preproc_french_sentences.shape[-2], 1))

ed_mod = encdec_model(
        tmp_x.shape,
    max_french_sequence_length,
    english_vocab_size + 1,
    french_vocab_size + 1)
ed_mod.summary()
ed_mod.fit(tmp_x, preproc_french_sentences, batch_size=1024, epochs=10, validation_split=0.2)

Epoch 1/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 268ms/step - accuracy: 0.4233 - loss: 3.0983 - val_accuracy: 0.5220 - val_loss: 2.0680
Epoch 2/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 269ms/step - accuracy: 0.5319 - loss: 1.9741 - val_accuracy: 0.5553 - val_loss: 1.7385
Epoch 3/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 274ms/step - accuracy: 0.5529 - loss: 1.7354 - val_accuracy: 0.5867 - val_loss: 1.5289
Epoch 4/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 332ms/step - accuracy: 0.5812 - loss: 1.5632 - val_accuracy: 0.6102 - val_loss: 1.4066
Epoch 5/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 316ms/step - accuracy: 0.6008 - loss: 1.4457 - val_accuracy: 0.6184 - val_loss: 1.3493
Epoch 6/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 383ms/step - accuracy: 0.6180 - loss: 1.3517 - val_accuracy: 0.6233 - val_loss: 1.3241
Epoch 7/10

<keras.src.callbacks.history.History at 0x15177ba00>

Accuracy keeps increasing from the first epoch to 10nth epoch.

### Model 5: Custom
incorporates embedding and a bidirectional rnn into one model.

In [16]:
def model_final(input_shape, output_sequence_length, english_vocab_size, french_vocab_size):
    """
    Build and train a model that incorporates embedding, encoder-decoder, and bidirectional RNN on x and y
    :param input_shape: Tuple of input shape
    :param output_sequence_length: Length of output sequence
    :param english_vocab_size: Number of unique English words in the dataset
    :param french_vocab_size: Number of unique French words in the dataset
    :return: Keras model built, but not trained
    """
    #Config Hyperparameters
    learning_rate = 0.01
    latent_dim = 128
    
    #Config Model
    inputs = Input(shape=input_shape[1:])
    embedding_layer = Embedding(input_dim=english_vocab_size,
                                output_dim=output_sequence_length,
                                mask_zero=False)(inputs)
    bd_layer = Bidirectional(GRU(output_sequence_length))(embedding_layer)
    encoding_layer = Dense(latent_dim, activation='relu')(bd_layer)
    decoding_layer = RepeatVector(output_sequence_length)(encoding_layer)
    output_layer = Bidirectional(GRU(latent_dim, return_sequences=True))(decoding_layer)
    outputs = TimeDistributed(Dense(french_vocab_size, activation='softmax'))(output_layer)
    
    #Create Model from parameters defined above
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss=sparse_categorical_crossentropy,
                  optimizer=Adam(learning_rate),
                  metrics=['accuracy'])
    
    return model
tests.test_model_final(model_final)
print('Final Model Loaded')

Final Model Loaded


## Prediction

In [17]:
def final_predictions(x, y, x_tk, y_tk):
    """
    Gets predictions using the final model
    :param x: Preprocessed English data
    :param y: Preprocessed French data
    :param x_tk: English tokenizer
    :param y_tk: French tokenizer
    """
    model = model_final(x.shape,
                        y.shape[1],
                       len(x_tk.word_index) + 1,
                       len(y_tk.word_index) + 1)
    model.summary()
    model.fit(x, y, batch_size=1024, epochs=10, validation_split=0.2)
    
    y_id_to_word = {value: key for key, value in y_tk.word_index.items()}
    y_id_to_word[0] = '<PAD>'

    sentence = 'he saw a old yellow truck'
    sentence = [x_tk.word_index[word] for word in sentence.split()]
    sentence = pad_sequences([sentence], maxlen=x.shape[-1], padding='post')
    sentences = np.array([sentence[0], x[0]])
    predictions = model.predict(sentences, len(sentences))

    print('Sample 1:')
    print(' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]]))
    print('Il a vu un vieux camion jaune')
    print('Sample 2:')
    print(' '.join([y_id_to_word[np.argmax(x)] for x in predictions[1]]))
    print(' '.join([y_id_to_word[np.max(x)] for x in y[0]]))


final_predictions(preproc_english_sentences, preproc_french_sentences, english_tokenizer, french_tokenizer)

Epoch 1/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 439ms/step - accuracy: 0.4517 - loss: 2.7068 - val_accuracy: 0.6715 - val_loss: 1.2214
Epoch 2/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 391ms/step - accuracy: 0.6901 - loss: 1.1151 - val_accuracy: 0.7422 - val_loss: 0.8770
Epoch 3/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 428ms/step - accuracy: 0.7640 - loss: 0.7902 - val_accuracy: 0.8161 - val_loss: 0.6099
Epoch 4/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 456ms/step - accuracy: 0.8296 - loss: 0.5592 - val_accuracy: 0.8601 - val_loss: 0.4493
Epoch 5/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 465ms/step - accuracy: 0.8629 - loss: 0.4383 - val_accuracy: 0.8856 - val_loss: 0.3737
Epoch 6/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 521ms/step - accuracy: 0.8921 - loss: 0.3492 - val_accuracy: 0.9149 - val_loss: 0.2789
Epoch 7/10

achieved accuracy of 92%

In [18]:
preproc_english_sentences

array([[17, 23,  1, ..., 44,  0,  0],
       [ 5, 20, 21, ..., 51,  2, 45],
       [22,  1,  9, ..., 34,  0,  0],
       ...,
       [24,  1, 10, ..., 54,  0,  0],
       [ 5, 84,  1, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]], dtype=int32)