# 9.1 Sequence to Sequence Models

9.1.2 Implementing a Basic Seq2Seq Model


In [None]:
!pip install tensorflow

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
input_texts = [
    "Hello.",
    "How are you?",
    "What is your name?",
    "Good morning.",
    "Good night."
]

target_texts = [
    "Bonjour.",
    "Comment ça va?",
    "Quel est votre nom?",
    "Bonjour.",
    "Bonne nuit."
]

# Tokenize the data
input_tokenizer = Tokenizer()
input_tokenizer.fit_on_texts(input_texts)
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
input_maxlen = max(len(seq) for seq in input_sequences)
input_vocab_size = len(input_tokenizer.word_index) + 1

target_tokenizer = Tokenizer()
target_tokenizer.fit_on_texts(target_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)
target_maxlen = max(len(seq) for seq in target_sequences)
target_vocab_size = len(target_tokenizer.word_index) + 1

# Pad sequences
input_sequences = pad_sequences(input_sequences, maxlen=input_maxlen, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=target_maxlen, padding='post')

# Split target sequences into input and output sequences
target_input_sequences = target_sequences[:, :-1]
target_output_sequences = target_sequences[:, 1:]

# Build the Seq2Seq model
latent_dim = 256

# Encoder
encoder_inputs = Input(shape=(input_maxlen,))
encoder_embedding = tf.keras.layers.Embedding(input_vocab_size, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = tf.keras.layers.Embedding(target_vocab_size, latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(target_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model
model.fit([input_sequences, target_input_sequences], target_output_sequences,
          batch_size=64, epochs=100, validation_split=0.2)

# Inference models for translation
# Encoder model
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

# Function to decode the sequence
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))

    # Populate the first token of target sequence with the start token.
    target_seq[0, 0] = target_tokenizer.word_index['bonjour']

    # Sampling loop for a batch of sequences
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = target_tokenizer.index_word[sampled_token_index]
        decoded_sentence += ' ' + sampled_word

        # Exit condition: either hit max length or find stop token.
        if (sampled_word == '.' or
           len(decoded_sentence) > target_maxlen):
            stop_condition = True

        # Update the target sequence (length 1).
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence

# Test the model
for seq_index in range(5):
    input_seq = input_sequences[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

# 9.2 Attention Mechanisms

9.2.3 Implementing Attention Mechanisms in Seq2Seq Models

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Concatenate, TimeDistributed
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
input_texts = [
    "Hello.",
    "How are you?",
    "What is your name?",
    "Good morning.",
    "Good night."
]

target_texts = [
    "Bonjour.",
    "Comment ça va?",
    "Quel est votre nom?",
    "Bonjour.",
    "Bonne nuit."
]

# Tokenize the data
input_tokenizer = Tokenizer()
input_tokenizer.fit_on_texts(input_texts)
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
input_maxlen = max(len(seq) for seq in input_sequences)
input_vocab_size = len(input_tokenizer.word_index) + 1

target_tokenizer = Tokenizer()
target_tokenizer.fit_on_texts(target_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)
target_maxlen = max(len(seq) for seq in target_sequences)
target_vocab_size = len(target_tokenizer.word_index) + 1

# Pad sequences
input_sequences = pad_sequences(input_sequences, maxlen=input_maxlen, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=target_maxlen, padding='post')

# Split target sequences into input and output sequences
target_input_sequences = target_sequences[:, :-1]
target_output_sequences = target_sequences[:, 1:]

# Define the Seq2Seq model with Attention
latent_dim = 256

# Encoder
encoder_inputs = Input(shape=(input_maxlen,))
encoder_embedding = Embedding(input_vocab_size, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(target_vocab_size, latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

# Attention mechanism
attention = tf.keras.layers.Attention()([decoder_outputs, encoder_outputs])
decoder_concat_input = Concatenate(axis=-1)([decoder_outputs, attention])

# Dense layer to generate predictions
decoder_dense = TimeDistributed(Dense(target_vocab_size, activation='softmax'))
decoder_outputs = decoder_dense(decoder_concat_input)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model
model.fit([input_sequences, target_input_sequences], target_output_sequences,
          batch_size=64, epochs=100, validation_split=0.2)

# Inference models for translation
# Encoder model
encoder_model = Model(encoder_inputs, [encoder_outputs] + encoder_states)

# Decoder model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_hidden_state_input = Input(shape=(input_maxlen, latent_dim))
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding, initial_state=decoder_states_inputs)
attention_output = attention([decoder_outputs, decoder_hidden_state_input])
decoder_concat_input = Concatenate(axis=-1)([decoder_outputs, attention_output])
decoder_outputs = decoder_dense(decoder_concat_input)
decoder_model = Model(
    [decoder_inputs] + [decoder_hidden_state_input] + decoder_states_inputs,
    [decoder_outputs] + [state_h, state_c])

# Function to decode the sequence
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    encoder_outputs, state_h, state_c = encoder_model.predict(input_seq)
    states_value = [state_h, state_c]

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))

    # Populate the first token of target sequence with the start token.
    target_seq[0, 0] = target_tokenizer.word_index['bonjour']

    # Sampling loop for a batch of sequences
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + [encoder_outputs] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = target_tokenizer.index_word[sampled_token_index]
        decoded_sentence += ' ' + sampled_word

        # Exit condition: either hit max length or find stop token.
        if (sampled_word == '.' or
           len(decoded_sentence) > target_maxlen):
            stop_condition = True

        # Update the target sequence (length 1).
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence

# Test the model
for seq_index in range(5):
    input_seq = input_sequences[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

# 9.3 Transformer Models

9.3.3 Implementing Transformer Models in TensorFlow

In [None]:
!pip install transformers

In [45]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load the pre-trained T5 model and tokenizer
model_name = "t5-small"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

# Sample text
text = """translate English to French: Machine learning is a subset of artificial intelligence. It involves algorithms and statistical models to perform tasks without explicit instructions. Machine learning is widely used in various applications such as image recognition, natural language processing, and autonomous driving. It relies on patterns and inference instead of predefined rules."""

# Tokenize and encode the text
inputs = tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True)

# Generate the translation
output_ids = model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
translation = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("Translation:")
print(translation)

Translation:
Dabei handelt es sich um eine Untergruppe der künstlichen Intelligenz, die Algorithmen und statistische Modelle zur Durchführung von Aufgaben ohne ausdrückliche Anweisungen einschließt.


9.3.4 Example: Visualizing Self-Attention Scores

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Function to visualize attention scores
def visualize_attention(model, tokenizer, text):
    inputs = tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, output_attentions=True)
    attentions = outputs[-1]  # Get the attention scores

    # Convert to numpy array for visualization
    attention_matrix = attentions[-1][0][0].detach().numpy()

    # Plot the attention scores
    plt.figure(figsize=(10, 8))
    sns.heatmap(attention_matrix, cmap="viridis")
    plt.title("Self-Attention Scores")
    plt.xlabel("Input Tokens")
    plt.ylabel("Output Tokens")
    plt.show()

# Visualize attention scores for a sample sentence
sample_text = "translate English to French: How are you?"
visualize_attention(model, tokenizer, sample_text)

# Chapter-9 Assignments

Exercise 1: Sequence to Sequence (Seq2Seq) Model with TensorFlow

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
input_texts = ["Hello.", "How are you?", "What is your name?", "Good morning.", "Good night."]
target_texts = ["Hola.", "¿Cómo estás?", "¿Cuál es tu nombre?", "Buenos días.", "Buenas noches."]

# Tokenize the data
input_tokenizer = Tokenizer()
input_tokenizer.fit_on_texts(input_texts)
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
input_maxlen = max(len(seq) for seq in input_sequences)
input_vocab_size = len(input_tokenizer.word_index) + 1

target_tokenizer = Tokenizer()
target_tokenizer.fit_on_texts(target_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)
target_maxlen = max(len(seq) for seq in target_sequences)
target_vocab_size = len(target_tokenizer.word_index) + 1

# Pad sequences
input_sequences = pad_sequences(input_sequences, maxlen=input_maxlen, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=target_maxlen, padding='post')

# Split target sequences into input and output sequences
target_input_sequences = target_sequences[:, :-1]
target_output_sequences = target_sequences[:, 1:]

# Build the Seq2Seq model
latent_dim = 256

# Encoder
encoder_inputs = Input(shape=(input_maxlen,))
encoder_embedding = Embedding(input_vocab_size, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(target_vocab_size, latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(target_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model
model.fit([input_sequences, target_input_sequences], target_output_sequences,
          batch_size=64, epochs=100, validation_split=0.2)

# Inference models for translation
# Encoder model
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

# Function to decode the sequence
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))

    # Populate the first token of target sequence with the start token.
    # Check if 'hola' is in the target tokenizer's word index, otherwise use a default or handle appropriately.
    start_token_index = target_tokenizer.word_index.get('hola', None)
    if start_token_index is None:
        # Handle the case where 'hola' is not in the vocabulary,
        # maybe use a different start token or the most frequent word.
        # For this example, we'll just use the first word in the index_word dictionary if available,
        # or raise an error if the vocabulary is empty.
        if not target_tokenizer.index_word:
            raise ValueError("Target vocabulary is empty.")
        start_token_index = list(target_tokenizer.index_word.keys())[0] # Use the index of the first word

    target_seq[0, 0] = start_token_index

    # Sampling loop for a batch of sequences
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        # Exit condition: either hit max length, find stop token, or sample padding token (index 0).
        if sampled_token_index == 0 or len(decoded_sentence) > target_maxlen:
             stop_condition = True
             continue # Skip adding padding token to the sentence

        sampled_word = target_tokenizer.index_word[sampled_token_index]
        decoded_sentence += ' ' + sampled_word

        if sampled_word == '.':
            stop_condition = True

        # Update the target sequence (length 1).
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence.strip() # Remove leading/trailing whitespace

# Test the model
for seq_index in range(5):
    input_seq = input_sequences[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  

Exercise 2: Seq2Seq Model with Attention in TensorFlow

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Concatenate, TimeDistributed, Attention
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
input_texts = ["Hello.", "How are you?", "What is your name?", "Good morning.", "Good night."]
target_texts = ["Hola.", "¿Cómo estás?", "¿Cuál es tu nombre?", "Buenos días.", "Buenas noches."]

# Tokenize the data
input_tokenizer = Tokenizer()
input_tokenizer.fit_on_texts(input_texts)
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
input_maxlen = max(len(seq) for seq in input_sequences)
input_vocab_size = len(input_tokenizer.word_index) + 1

target_tokenizer = Tokenizer()
target_tokenizer.fit_on_texts(target_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)
target_maxlen = max(len(seq) for seq in target_sequences)
target_vocab_size = len(target_tokenizer.word_index) + 1

# Pad sequences
input_sequences = pad_sequences(input_sequences, maxlen=input_maxlen, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=target_maxlen, padding='post')

# Split target sequences into input and output sequences
target_input_sequences = target_sequences[:, :-1]
target_output_sequences = target_sequences[:, 1:]

# Define the Seq2Seq model with Attention
latent_dim = 256

# Encoder
encoder_inputs = Input(shape=(input_maxlen,))
encoder_embedding = Embedding(input_vocab_size, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(target_vocab_size, latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

# Attention mechanism
attention_layer = Attention()
attention_output = attention_layer([decoder_outputs, encoder_outputs])
decoder_concat_input = Concatenate(axis=-1)([decoder_outputs, attention_output])

# Dense layer to generate predictions
decoder_dense = TimeDistributed(Dense(target_vocab_size, activation='softmax'))
decoder_outputs = decoder_dense(decoder_concat_input)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Train the model
model.fit([input_sequences, target_input_sequences], target_output_sequences,
          batch_size=64, epochs=100, validation_split=0.2)

# Inference models for translation
# Encoder model
encoder_model = Model(encoder_inputs, [encoder_outputs] + encoder_states)

# Decoder model
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_hidden_state_input = Input(shape=(input_maxlen, latent_dim))
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding, initial_state=decoder_states_inputs)
attention_output = attention_layer([decoder_outputs, decoder_hidden_state_input])
decoder_concat_input = Concatenate(axis=-1)([decoder_outputs, attention_output])
decoder_outputs = decoder_dense(decoder_concat_input)
decoder_model = Model(
    [decoder_inputs] + [decoder_hidden_state_input] + decoder_states_inputs,
    [decoder_outputs] + [state_h, state_c])

# Function to decode the sequence
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    encoder_outputs, state_h, state_c = encoder_model.predict(input_seq)
    states_value = [state_h, state_c]

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))

    # Populate the first token of target sequence with the start token.
    start_token_index = target_tokenizer.word_index.get('hola', None)
    if start_token_index is None:
        if not target_tokenizer.index_word:
            raise ValueError("Target vocabulary is empty.")
        start_token_index = list(target_tokenizer.index_word.keys())[0]

    target_seq[0, 0] = start_token_index


    # Sampling loop for a batch of sequences
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + [encoder_outputs] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        # Exit condition: either hit max length, find stop token, or sample padding token (index 0).
        if sampled_token_index == 0 or len(decoded_sentence) > target_maxlen:
             stop_condition = True
             continue # Skip adding padding token to the sentence

        sampled_word = target_tokenizer.index_word[sampled_token_index]
        decoded_sentence += ' ' + sampled_word

        # Exit condition: either hit max length or find stop token.
        if sampled_word == '.':
            stop_condition = True


        # Update the target sequence (length 1).
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence.strip() # Remove leading/trailing whitespace

# Test the model
for seq_index in range(5):
    input_seq = input_sequences[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  

Exercise 3: Transformer Model with T5

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load the pre-trained T5 model and tokenizer
model_name = "t5-small"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

# Sample text
text = """translate English to Spanish: Machine learning is a subset of artificial intelligence.
It involves algorithms and statistical models to perform tasks without explicit instructions.
Machine learning is widely used in various applications such as image recognition,
natural language processing, and autonomous driving.
It relies on patterns and inference instead of predefined rules."""

# Tokenize and encode the text
inputs = tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True)

# Generate the translation
output_ids = model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
translation = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("Translation:")
print(translation)

**Explain the code snippet above in detail. **

___

**Type Your ResponseBelow:**  

Exercise 5: Comparing Seq2Seq, Attention, and Transformer Models

In [None]:
input_seq = pad_sequences(input_tokenizer.texts_to_sequences(["How are you?"]), maxlen=input_maxlen, padding='post')
seq2seq_translation = decode_sequence(input_seq)
print("Seq2Seq Translation:", seq2seq_translation)

In [None]:
input_seq = pad_sequences(input_tokenizer.texts_to_sequences(["How are you?"]), maxlen=input_maxlen, padding='post')
attention_translation = decode_sequence(input_seq)
print("Seq2Seq with Attention Translation:", attention_translation)

In [None]:
text = "translate English to Spanish: How are you?"
inputs = tokenizer.encode(text, return_tensors="pt", max_length=512, truncation=True)
output_ids = model.generate(inputs, max_length=50, num_beams=4, early_stopping=True)
transformer_translation = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("Transformer (T5) Translation:", transformer_translation)

** Provide your feedback on each the Model **

___

**Type Your ResponseBelow:**  