## Data Preparation

In [1]:
# Import necessary libraries
import pathlib
import random
import string
import re
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.layers import Bidirectional,GRU,LSTM,Embedding
from tensorflow.keras.layers import Dense,MultiHeadAttention,LayerNormalization,Embedding,Dropout,Layer
from tensorflow.keras import Sequential,Input
from tensorflow.keras.callbacks import ModelCheckpoint

from nltk.translate.bleu_score import sentence_bleu

In [2]:
# Load the dataset from the provided file
text_file = '/kaggle/input/french/fra.txt'

In [3]:
with open(text_file) as f:
    lines = f.read().split("\n")[:-1]

# Prepare text pairs (English and French sentences)
text_pairs = []
for line in lines:
    english, french = line.split("\t")
    french = "[start] " + french + " [end]"
    text_pairs.append((english, french))

In [4]:
# Display a random text pair
import random
print(random.choice(text_pairs))

('You knew I was married.', "[start] Tu savais que j'étais marié. [end]")


In [5]:
# Shuffle text pairs and split into training, validation, and test datasets
import random
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples:num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples:]

In [6]:
# Define characters to strip from the text
strip_chars = string.punctuation + "¿"
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

In [7]:
# Custom standardization function for text preprocessing
def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(
        lowercase, f"[{re.escape(strip_chars)}]", "")

In [8]:
# Set parameters for text vectorization
vocab_size = 15000
sequence_length = 20

# Initialize TextVectorization layers for source and target languages
source_vectorization = TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length,
)
target_vectorization = TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length + 1,
    standardize=custom_standardization,
)

# Adapt the vectorization layers using the training data
train_english_texts = [pair[0] for pair in train_pairs]
train_french_texts = [pair[1] for pair in train_pairs]
source_vectorization.adapt(train_english_texts)
target_vectorization.adapt(train_french_texts)

In [9]:
# Set batch size for training
batch_size = 64

# Function to format dataset for training
def format_dataset(eng, fre):
    eng = source_vectorization(eng)
    fre = target_vectorization(fre)
    return ({
        "english": eng,
        "french": fre[:, :-1],
    }, fre[:, 1:])

# Function to create a TensorFlow dataset from text pairs
def make_dataset(pairs):
    eng_texts, fre_texts = zip(*pairs)
    eng_texts = list(eng_texts)
    fre_texts = list(fre_texts)
    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, fre_texts))
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(format_dataset, num_parallel_calls=4)
    return dataset.shuffle(2048).prefetch(16).cache()

# Create training and validation datasets
train_ds = make_dataset(train_pairs)
val_ds = make_dataset(val_pairs)

## LSTM Model

In [10]:
# Import necessary libraries
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import ModelCheckpoint
from nltk.translate.bleu_score import sentence_bleu

In [11]:
# Define the LSTM model architecture
embed_dim = 256
lstm_units = 512

encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="english")
x = Embedding(vocab_size, embed_dim, mask_zero=True)(encoder_inputs)
encoder_outputs, state_h, state_c = LSTM(lstm_units, return_state=True)(x)
encoder_states = [state_h, state_c]

decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="french")
x = Embedding(vocab_size, embed_dim, mask_zero=True)(decoder_inputs)
x = LSTM(lstm_units, return_sequences=True, return_state=True)(x, initial_state=encoder_states)
decoder_outputs = Dense(vocab_size, activation="softmax")(x[0])

lstm_model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [12]:
# Training parameters and setup
epochs = 10

checkpoint = ModelCheckpoint(filepath='lstm_language_translation_checkpoint.weights.h5', save_weights_only=True, verbose=1, monitor='val_accuracy')

lstm_model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Train the model
lstm_model.fit(train_ds, epochs=epochs, callbacks=[checkpoint], validation_data=val_ds)

# Saving model weights
lstm_model.save_weights("lstm_translator.weights.h5")
load_status = lstm_model.load_weights("lstm_translator.weights.h5")

Epoch 1/10
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.2294 - loss: 5.2631
Epoch 1: saving model to lstm_language_translation_checkpoint.weights.h5
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 71ms/step - accuracy: 0.2294 - loss: 5.2628 - val_accuracy: 0.1566 - val_loss: 4.0031
Epoch 2/10
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.1642 - loss: 3.8697
Epoch 2: saving model to lstm_language_translation_checkpoint.weights.h5
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 71ms/step - accuracy: 0.1643 - loss: 3.8696 - val_accuracy: 0.1834 - val_loss: 3.3943
Epoch 3/10
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.1885 - loss: 3.3268
Epoch 3: saving model to lstm_language_translation_checkpoint.weights.h5
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 71ms/step - accuracy: 0.1885 -

In [13]:
# Evaluate the model on the test data
fra_vocab = target_vectorization.get_vocabulary()
fra_index_lookup = dict(zip(range(len(fra_vocab)), fra_vocab))
max_decoded_sentence_length = 20

def decode_sequence_lstm(input_sentence):
    tokenized_input_sentence = source_vectorization([input_sentence])
    decoded_sentence = "[start]"
    for i in range(max_decoded_sentence_length):
        tokenized_target_sentence = target_vectorization([decoded_sentence])[:, :-1]
        predictions = lstm_model([tokenized_input_sentence, tokenized_target_sentence])
        sampled_token_index = np.argmax(predictions[0, i, :])
        sampled_token = fra_index_lookup[sampled_token_index]
        decoded_sentence += " " + sampled_token
        if sampled_token == "[end]":
            break
    return decoded_sentence

test_eng_texts = [pair[0] for pair in test_pairs]
for _ in range(5):
    input_sentence = random.choice(test_eng_texts)
    print("-")
    print(input_sentence)
    print(decode_sequence_lstm(input_sentence))

-
She helped me out countless times.
[start] elle ma appelé plusieurs fois [end]
-
There's no need to call a doctor.
[start] il ne faut pas quil faut un médecin [end]
-
I mean you no harm.
[start] je ne veux pas que vous ne soyez pas mal [end]
-
Stop talking to me about Tom.
[start] arrêtez de parler avec tom [end]
-
We're less than halfway to the top of the mountain. Are you already tired?
[start] nous sommes à la moitié du chemin et je suis déjà à court de main [end]


In [14]:
# Evaluation using the BLEU score
test_eng_texts = [pair[0] for pair in test_pairs]
test_fra_texts = [pair[1] for pair in test_pairs]
score = 0
bleu = 0
for i in range(20):
    candidate = decode_sequence_lstm(test_eng_texts[i])
    reference = test_fra_texts[i].lower()
    print(candidate, reference)
    score = sentence_bleu(reference, candidate, weights=(1, 0, 0, 0))
    bleu += score
    print(f"Score: {score}")
print(f"\nBLEU score : {round(bleu, 2)}/20")

[start] jai une proposition [end] [start] j'ai une proposition. [end]
Score: 0.45454545454545453
[start] jai tous mes amis [end] [start] j'ai toutes les amies dont j'ai besoin. [end]
Score: 0.4838709677419355


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


[start] où êtesvous [end] [start] où étais-tu ? [end]
Score: 0.52
[start] nous nous [UNK] [end] [start] nous nous sommes amusées avec elles. [end]
Score: 0.41379310344827586
[start] je veux que tu [UNK] [end] [start] je veux que vous me parliez. [end]
Score: 0.4411764705882353
[start] tu peux nager [end] [start] tu peux nager. [end]
Score: 0.5185185185185185
[start] je ne savais pas que tom était mort [end] [start] je ne savais pas que tom était décédé. [end]
Score: 0.3877551020408163
[start] cest un homme à la personne [end] [start] c’est un homme de caractère. [end]
Score: 0.3658536585365854
[start] je ne veux même pas savoir ce que nous ne pouvons pas faire [end] [start] je ne veux même pas songer à ce qui pourrait se produire. [end]
Score: 0.2876712328767123
[start] comment sappelle [end] [start] comment s'appelle-t-il ? [end]
Score: 0.5
[start] voici ma chambre [end] [start] voici ma chambre ! [end]
Score: 0.5666666666666667
[start] les choses ne peuvent pas me [UNK] [end] [start]

## Transformer Model

-------------------------------------------------------------------------------------------------------------------------------

In [15]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Embedding, Dense, LayerNormalization, MultiHeadAttention, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from nltk.translate.bleu_score import sentence_bleu

In [16]:
# Define the Transformer Encoder class
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, num_heads, dense_dim, **kwargs):
        super(TransformerEncoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.dense_dim = dense_dim
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential([Dense(dense_dim, activation="relu"), Dense(embed_dim)])
        self.layernorm_1 = LayerNormalization()
        self.layernorm_2 = LayerNormalization()

    def call(self, inputs):
        attention_output = self.attention(inputs, inputs, inputs)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)

In [17]:
# Define the Transformer Decoder class
class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, num_heads, dense_dim, **kwargs):
        super(TransformerDecoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.dense_dim = dense_dim
        self.attention_1 = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.attention_2 = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential([Dense(dense_dim, activation="relu"), Dense(embed_dim)])
        self.layernorm_1 = LayerNormalization()
        self.layernorm_2 = LayerNormalization()

    def call(self, inputs, encoder_outputs):
        attention_output_1 = self.attention_1(inputs, inputs, inputs)
        attention_output_1 = self.layernorm_1(inputs + attention_output_1)
        attention_output_2 = self.attention_2(attention_output_1, encoder_outputs, encoder_outputs)
        attention_output_2 = self.layernorm_2(attention_output_1 + attention_output_2)
        proj_output = self.dense_proj(attention_output_2)
        return attention_output_2 + proj_output

In [18]:
# Define the Positional Embedding class
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, embed_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.sequence_length = sequence_length

    def build(self, input_shape):
        # Initialize positional embedding
        self.pos_embedding = self.add_weight(
            name="pos_embedding",
            shape=(self.sequence_length, self.embed_dim),  # Shape based on sequence_length
            initializer="zeros",
            trainable=True
        )

    def call(self, inputs):
        length = tf.shape(inputs)[-2]  # Get the sequence length from inputs (batch size is ignored)
        return inputs + self.pos_embedding[:length, :]  # Slice pos_embedding based on sequence length


In [29]:
# Define the Positional Embedding class
class PositionalEmbedding(Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = Embedding(
            input_dim=input_dim, output_dim=output_dim)
        print(input_dim,output_dim)
        #intermediate = self.getPositionEncoding(seq_len=input_dim,d=vocab_size,n=output_dim)
        self.position_embeddings = Embedding(input_dim=input_dim, output_dim=output_dim)
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim

    def getPositionEncoding(self,seq_len, d, n = sequence_length):
        P = np.zeros((seq_len, d))
        for k in range(seq_len):
            for i in np.arange(int(d/2)):
                denominator = np.power(n, 2*i/d)
                P[k, 2*i] = np.sin(k/denominator)
                P[k, 2*i+1] = np.cos(k/denominator)
        tensor = tf.convert_to_tensor(P)
        print(tensor.shape)
        return tensor
    
    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)

    def get_config(self):
        config = super(PositionalEmbedding, self).get_config()
        config.update({
            "output_dim": self.output_dim,
            "sequence_length": self.sequence_length,
            "input_dim": self.input_dim,
        })
        return config

In [32]:
# Build the Transformer model architecture
embed_dim = 256
dense_dim = 2048
num_heads = 8

encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="english")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)
encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)

decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="french")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)
x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs)
x = Dropout(0.5)(x)
decoder_outputs = Dense(vocab_size, activation="softmax")(x)
transformer = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

15000 256


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [28]:
# Build the Transformer model architecture
embed_dim = 256
dense_dim = 2048
num_heads = 8

# Encoder
encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="english")
x = Embedding(vocab_size, embed_dim)(encoder_inputs)
x = PositionalEmbedding(sequence_length, embed_dim)(x)
encoder_outputs = TransformerEncoder(embed_dim, num_heads, dense_dim)(x)

# Decoder
decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="french")
x = Embedding(vocab_size, embed_dim)(decoder_inputs)
x = PositionalEmbedding(sequence_length, embed_dim)(x)
x = TransformerDecoder(embed_dim, num_heads, dense_dim)(x, encoder_outputs)
x = Dropout(0.5)(x)
decoder_outputs = Dense(vocab_size, activation="softmax")(x)

# Final model
transformer_model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [20]:
# Training parameters and setup
epochs = 10

checkpoint = ModelCheckpoint(filepath='transformer_language_translation_checkpoint.weights.h5', save_weights_only=True, verbose=1, monitor='val_accuracy')

transformer_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Assuming `train_ds` and `val_ds` are pre-defined datasets
transformer_model.fit(train_ds, epochs=epochs, callbacks=[checkpoint], validation_data=val_ds)

# Saving model weights
transformer_model.save_weights("transformer_translator.weights.h5")
load_status = transformer_model.load_weights("transformer_translator.weights.h5")

Epoch 1/10
[1m1827/1828[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 27ms/step - accuracy: 0.8254 - loss: 1.2854
Epoch 1: saving model to transformer_language_translation_checkpoint.weights.h5
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 32ms/step - accuracy: 0.8255 - loss: 1.2847 - val_accuracy: 0.9877 - val_loss: 0.0887
Epoch 2/10
[1m1827/1828[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 27ms/step - accuracy: 0.9831 - loss: 0.1130
Epoch 2: saving model to transformer_language_translation_checkpoint.weights.h5
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 29ms/step - accuracy: 0.9831 - loss: 0.1129 - val_accuracy: 0.9952 - val_loss: 0.0314
Epoch 3/10
[1m1827/1828[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 27ms/step - accuracy: 0.9903 - loss: 0.0510
Epoch 3: saving model to transformer_language_translation_checkpoint.weights.h5
[1m1828/1828[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 29ms/step - 

In [21]:
# Evaluate the model on the test data
fra_vocab = target_vectorization.get_vocabulary()
fra_index_lookup = dict(zip(range(len(fra_vocab)), fra_vocab))
max_decoded_sentence_length = 20

def decode_sequence_transformer(input_sentence):
    tokenized_input_sentence = source_vectorization([input_sentence])
    decoded_sentence = "[start]"
    for i in range(max_decoded_sentence_length):
        tokenized_target_sentence = target_vectorization([decoded_sentence])[:, :-1]
        predictions = transformer_model([tokenized_input_sentence, tokenized_target_sentence])
        sampled_token_index = np.argmax(predictions[0, i, :])
        sampled_token = fra_index_lookup[sampled_token_index]
        decoded_sentence += " " + sampled_token
        if sampled_token == "[end]":
            break
    return decoded_sentence

In [22]:
# Evaluation using the BLEU score
score = 0
bleu = 0
for i in range(20):
    candidate = decode_sequence_transformer(test_eng_texts[i])
    reference = test_fra_texts[i].lower()
    score = sentence_bleu(reference, candidate, weights=(1, 0, 0, 0))
    bleu += score
    print(f"Score: {score}")
print(f"\nBLEU score : {round(bleu, 2)}/20")

Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924
Score: 0.25925925925925924

BLEU score : 5.19/20


--------------------------------------------------------------------------------------------------------------

In [36]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, MultiHeadAttention, Dropout, LayerNormalization, Dense
from tensorflow.keras.models import Model

# Parameters
embed_dim = 256
num_heads = 8
dense_dim = 512  # Dimension for the dense layer
num_layers = 2  # Number of transformer layers
dropout_rate = 0.1

# Input layer
inputs = Input(shape=(None,), dtype="int64", name="inputs")

# Embedding layer
embedding = Embedding(vocab_size, embed_dim)(inputs)

# Positional Encoding using tf.keras.layers.Embedding for simplicity
pos_encoding = Embedding(input_dim=sequence_length, output_dim=embed_dim)(tf.range(sequence_length))

# Add positional encoding to the embedding
x = embedding + pos_encoding

# Add transformer layers
for _ in range(num_layers):
    # Multi-Head Attention
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(x, x)
    attn_output = Dropout(dropout_rate)(attn_output)
    x = LayerNormalization(epsilon=1e-6)(x + attn_output)

    # Feed-forward network
    ffn_output = Dense(dense_dim, activation="relu")(x)
    ffn_output = Dense(embed_dim)(ffn_output)
    ffn_output = Dropout(dropout_rate)(ffn_output)
    x = LayerNormalization(epsilon=1e-6)(x + ffn_output)

# Final output layer
outputs = Dense(vocab_size, activation="softmax")(x)

# Build the model
model = Model(inputs, outputs)

# Summary of the model
model.summary()


In [37]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=64, epochs=10)
predictions = model.predict(X_test)


NameError: name 'X_train' is not defined

## Pre-trained model from Hugging Face

In [23]:
from transformers import MarianMTModel, MarianTokenizer

# Load pretrained model and tokenizer for English to French translation
model_name = "Helsinki-NLP/opus-mt-en-fr"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

In [24]:
def translate_with_huggingface(input_sentence):
    # Tokenize the input sentence
    tokenized_input_sentence = tokenizer.encode(input_sentence, return_tensors="pt", padding=True, truncation=True)
    
    # Get the prediction from the model
    translated = model.generate(tokenized_input_sentence, max_length=50, num_beams=4, early_stopping=True)
    
    # Decode the translated sentence
    translated_sentence = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_sentence

In [25]:
# Test with both models
test_eng_texts = [pair[0] for pair in test_pairs]
for input_sentence in test_eng_texts[:5]:  # Adjust to the desired number of sentences
    print("Original English sentence:", input_sentence)
    
    # Translate with LSTM model
    LSTM_translation = decode_sequence_lstm(input_sentence)
    print("LSTM Translation:", LSTM_translation)

    # Translate with Transformer model
    transformer_translation = decode_sequence_transformer(input_sentence)
    print("Transformer Translation:", transformer_translation)
    
    # Translate with Hugging Face model
    hf_translation = translate_with_huggingface(input_sentence)
    print("Hugging Face Translation:", hf_translation)
    print("-" * 50)

Original English sentence: I have a proposal.
LSTM Translation: [start] jai une proposition [end]
Transformer Translation: [start]                    
Hugging Face Translation: J'ai une proposition.
--------------------------------------------------
Original English sentence: I've got all the friends I need.
LSTM Translation: [start] jai tous mes amis [end]
Transformer Translation: [start]                    
Hugging Face Translation: J'ai tous les amis dont j'ai besoin.
--------------------------------------------------
Original English sentence: Where were you?
LSTM Translation: [start] où êtesvous [end]
Transformer Translation: [start]                    
Hugging Face Translation: Où étais-tu?
--------------------------------------------------
Original English sentence: We had fun with them.
LSTM Translation: [start] nous nous [UNK] [end]
Transformer Translation: [start]                    
Hugging Face Translation: On s'est amusés avec eux.
-----------------------------------------