# Preliminary settings

Libraries imported.

In [54]:
import os
import re
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import efficientnet
from tensorflow.keras.layers import TextVectorization

from datetime import datetime
seed = int(round(datetime.now().timestamp()))
np.random.seed(seed)
tf.random.set_seed(seed)

Path to data.

In [55]:
dx = "/data/train/rxxch9vw59.2/"

Important constants.

In [56]:
# Path to the images
IMAGES_PATH = dx+"images"

# Desired image dimensions
IMAGE_SIZE = (299, 299)

# Vocabulary size
VOCAB_SIZE = 1661
# VOCAB_SIZE = len(final_vocab)

# Fixed length allowed for any sequence
SEQ_LENGTH = 15

# Dimension for the image embeddings and token embeddings
EMBED_DIM = 512

# Per-layer units in the feed-forward network
FF_DIM = 512

# Other training parameters
BATCH_SIZE = 32
EPOCHS = 1
AUTOTUNE = tf.data.AUTOTUNE

# Dataset creation

The image files are loaded. Each image is paired with two captions.
The pairs are shuffled and split into 20% test and 80% train set.

In [57]:
def load_captions_data(filename):
    """Loads captions (text) data and maps them to corresponding images.

    Args:
        filename: Path to the text file containing caption data.

    Returns:
        caption_mapping: Dictionary mapping image names and the corresponding captions
        text_data: List containing all the available captions
    """
    with open(filename, encoding="utf8") as caption_file:
        caption_data = json.load(caption_file)
        caption_mapping = {}
        text_data = []

        for item in caption_data:  # Iterate over the list of dictionaries
            img_name = os.path.join(IMAGES_PATH, item['filename'].strip())  # Access 'filename'
            caption_mapping[img_name] = ["<start> " + caption.strip() + " <end>" for caption in item['caption']]  # Access 'caption'
            text_data.extend(caption_mapping[img_name])

        return caption_mapping, text_data


def train_val_split(caption_data, train_size=0.8, shuffle=True):
    """Split the captioning dataset into train and validation sets.

    Args:
        caption_data (dict): Dictionary containing the mapped caption data
        train_size (float): Fraction of all the full dataset to use as training data
        shuffle (bool): Whether to shuffle the dataset before splitting

    Returns:
        Training and validation datasets as two separated dicts
    """

    # 1. Get the list of all image names
    all_images = list(caption_data.keys())

    # 2. Shuffle if necessary
    if shuffle:
        np.random.shuffle(all_images)

    # 3. Split into training and validation sets
    train_size = int(len(caption_data) * train_size)

    training_data = {
        img_name: caption_data[img_name] for img_name in all_images[:train_size]
    }
    validation_data = {
        img_name: caption_data[img_name] for img_name in all_images[train_size:]
    }

    # 4. Return the splits
    return training_data, validation_data


# Load the dataset
captions_mapping, text_data = load_captions_data(dx + "captions.json")

# Split the dataset into training and validation sets
train_data, valid_data = train_val_split(captions_mapping)
print("Number of training samples: ", len(train_data))
print("Number of validation samples: ", len(valid_data))

Number of training samples:  7323
Number of validation samples:  1831


In [58]:
# Import necessary libraries
from collections import Counter
import json
import matplotlib.pyplot as plt

# Load the captions data from the file
captions_path = dx + "captions.json"
with open(captions_path, 'r', encoding='utf-8') as f:
    captions_data = json.load(f)

# Flatten all captions into a single list
captions = []
for item in captions_data:
    captions.extend(item['caption'])

# Tokenize the captions by splitting on spaces
all_words = ' '.join(captions).split()

# Count the frequency of each word
word_freq = Counter(all_words)

# Visualize the word frequency distribution
plt.figure(figsize=(10,6))
plt.hist(list(word_freq.values()), bins=50)
plt.yscale('log')
plt.title('Word Frequency Distribution')
plt.xlabel('Word Count')
plt.ylabel('Frequency')
plt.show()

# Return the word frequencies
word_freq.most_common(10)  # Show the 10 most common words for reference


[('আছে।', 9264),
 ('একজন', 8775),
 ('একটি', 6006),
 ('পুরুষ', 4564),
 ('মানুষ', 4091),
 ('দাড়িয়ে', 3489),
 ('বসে', 3379),
 ('ও', 2350),
 ('দিয়ে', 2223),
 ('জন', 2201)]

In [59]:
min_word_freq = 5  # Threshold
frequent_words = {word: count for word, count in word_freq.items() if count >= min_word_freq}

# Check how many words have left after filtering
print(f"Number of frequent words: {len(frequent_words)}")

Number of frequent words: 1659


In [60]:
text_data

['<start> তিন জন মেয়ে মানুষ আছে। এক জন দাড়িয়ে আছে আর দুই জন বসে আছে। <end>',
 '<start> একটি হলুদ জামা পায়জামা পরা মহিলা দাড়িয়ে হাতে একটি বেত নিয়ে পিটানোর ভাব দেখাচ্ছে আর ছোট একটি মেয়ে পিছনে ব্যাগ নিয়ে বসে কাঁদছে। <end>',
 '<start> অনেক মেয়ে মানুষ বসে আছে। <end>',
 '<start> একটি নীল জামা পরা মহিলা একটি নীল ল্যাপটপ এর দিকে তাকিয়ে আছে এবং পিছনে  তার দিকে বসে শারি পরে তাকিয়ে আছে অনেকগুলো মহিলা। <end>',
 '<start> অনেক মানুষ একসাথে বসে কাজ করছে। <end>',
 '<start> ২ টি  ছোট ছেলে একজন শার্ট প্যান্ট  দাড়িয়ে চেয়ে আছে আরেকজন বসে গার্মেন্টস এ কাজ করছে নীল  শার্ট পরে তাদের পিছনে অনেকগুলো মহিলা বসে দাড়িয়ে কাজ করছে। <end>',
 '<start> ছয় জন মানুষ দাড়িয়ে আছে। <end>',
 '<start> ৬ জন মানুষ এলোমেলো দাড়িয়ে আছে, তাদের মাঝে ২ জন ছেলে ৪ জন পুরুষ,  তাদের একজন লুঙ্গী পরে দাড়িয়ে আছে। <end>',
 '<start> এক জন মেয়ে মানুষ মাথায় ঘোমটা দিয়ে কাজ করছে। মাটিতে বিভিন্ন রঙের মসলা আছে। <end>',
 '<start> একটি মহিলা হালকা পানির উপরে দাড়িয়ে আছে শারি পরে, মহিলার মুখ ডানদিকে ঘুরানো, পানির রং হলুদ দেখাচ্ছে। <end>',
 '<start> আবছা

Image augmentation is performed and the captions are vectorised.

In [61]:
def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")


strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
strip_chars = strip_chars.replace("<", "")
strip_chars = strip_chars.replace(">", "")

# List of words to include in the final vocabulary
final_vocab = list(frequent_words.keys())
print(f"Final Vocabulary Length: {len(final_vocab)}")

# Initialize the TextVectorization layer with the final vocabulary
vectorization = TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=SEQ_LENGTH,
    standardize=custom_standardization,
)
vectorization.set_vocabulary(final_vocab)

# Adapting the vectorization layer
# vectorization.adapt(text_data)

# Save the vocabulary to a JSON file after training
vocab = vectorization.get_vocabulary()
with open('/results/vocab.json', 'w', encoding='utf-8') as f:
    json.dump(vocab, f, ensure_ascii=False, indent=4)
print(f"Vocabulary saved with {len(vocab)} tokens.")

# Data augmentation for image data
image_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.2),
        layers.RandomContrast(0.3),
    ]
)


Final Vocabulary Length: 1659
Vocabulary saved with 1661 tokens.


In [62]:
covered = sum(word_freq[word] for word in final_vocab)
total = sum(word_freq.values())
print(f"Coverage: {covered / total * 100:.2f}%")

Coverage: 95.69%


A function to expand the dataset such that each image is paired with each caption separately.

In [63]:
def flatten_dataset(caption_mapping):
    """Flattens the dataset by creating separate (image, caption) pairs for each caption of an image.

    Args:
        caption_mapping: Dictionary mapping image names to a list of captions.

    Returns:
        images: List of image paths (repeated for each caption).
        captions: List of corresponding captions (each caption as a string).
    """
    images = []
    captions = []

    # For each image, repeat the image path for each caption and append to the lists
    for img_path, caption_list in caption_mapping.items():
        for caption in caption_list:
            images.append(img_path)
            captions.append(caption)

    return images, captions


The train and test datasets are created using TensorFlow Dataset.

In [64]:
def decode_and_resize(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMAGE_SIZE)
    img = tf.image.convert_image_dtype(img, tf.float32)
    return img


def process_input(img_path, caption):
    img = decode_and_resize(img_path)
    
    # Vectorize the caption
    captions_vectorized = vectorization(caption)
    
    return img, captions_vectorized


# Function for dataset creation
def make_dataset(images, captions):
    # Create a dataset from tensor slices
    dataset = tf.data.Dataset.from_tensor_slices((images, captions))
    dataset = dataset.shuffle(BATCH_SIZE * 8)
    
    # Map images and captions to processed inputs
    
    # Process images and vectorize captions
    dataset = dataset.map(lambda img, cap: process_input(img, cap), num_parallel_calls=AUTOTUNE)
    
    for img, cap in dataset.take(1):
        print(f"Image and caption shape: {img.shape, cap.shape} \n")
        
    # Batch the data
    dataset = dataset.batch(BATCH_SIZE).prefetch(AUTOTUNE)

    return dataset


# def make_dataset(images, captions):
#     print([len(i) for i in images[:5]])
#     print([len(i) for i in captions[:5]])

# Flatten the training data
train_images, train_captions = flatten_dataset(train_data)
valid_images, valid_captions = flatten_dataset(valid_data)

# Pass the list of images and the list of corresponding captions
# Create train and validation datasets
print("Training dataset \n")
train_dataset = make_dataset(train_images, train_captions)

print("Validation dataset \n")
valid_dataset = make_dataset(valid_images, valid_captions)

# Print the shapes of the datasets
for img_batch, cap_batch in train_dataset.take(1):
    print(f"Training batch image shape: {img_batch.shape}, caption shape: {cap_batch.shape}")
for img_batch, cap_batch in valid_dataset.take(1):
    print(f"Validation batch image shape: {img_batch.shape}, caption shape: {cap_batch.shape}")

Training dataset 

Image and caption shape: (TensorShape([299, 299, 3]), TensorShape([15])) 





Validation dataset 

Image and caption shape: (TensorShape([299, 299, 3]), TensorShape([15])) 





Training batch image shape: (32, 299, 299, 3), caption shape: (32, 15)




Validation batch image shape: (32, 299, 299, 3), caption shape: (32, 15)


# Transformer model creation

The Transformer uses the EfficientNetB0 CNN.
The Encoder and Decoder block and Positional Embedding layer is seperately created by inheriting the Layer class.
The Transformer model is created by compiling the layers and inheriting the Model class.

In [65]:
def get_cnn_model():
    base_model = efficientnet.EfficientNetB0(
        input_shape=(*IMAGE_SIZE, 3), include_top=False, weights="imagenet",
    )
    # We freeze our feature extractor
    base_model.trainable = False
    base_model_out = base_model.output
    
    # Reduce the sequence length using a pooling operation
    # Usign GlobalAveragePooling2D to reduce the spatial dimensions
    base_model_out = layers.GlobalAveragePooling2D()(base_model_out)
    
    # Optionally, project the output to match the embedding size
    base_model_out = layers.Dense(EMBED_DIM)(base_model_out)
    
    cnn_model = keras.models.Model(base_model.input, base_model_out)
    
    # Print CNN Model Summary
    print("\nCNN Model Summary:")
    cnn_model.summary()
    return cnn_model


class TransformerEncoderBlock(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.0
        )
        self.layernorm_1 = layers.LayerNormalization() # Potential error source
        self.layernorm_2 = layers.LayerNormalization()
        self.dense_1 = layers.Dense(embed_dim, activation="relu")

    def call(self, inputs, training, mask=None):
        # Input shape
        print(f"Encoder Input Shape: {inputs.shape}")

        print(f"Encoder Input Shape before LayerNorm: {inputs.shape}")
        inputs = self.layernorm_1(inputs)
        print(f"Encoder Input Shape after LayerNorm: {inputs.shape}")
        
        inputs = self.dense_1(inputs)

        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=None,
            training=training,
        )
        
        out_1 = self.layernorm_2(inputs + attention_output_1)

        # Output shape
        print(f"Encoder Output Shape: {out_1.shape}")
        return out_1


class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.embed_scale = tf.math.sqrt(tf.cast(embed_dim, tf.float32))

    def call(self, inputs):
        print(f"Positional Embedding Input Shape: {inputs.shape}")
        
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_tokens = embedded_tokens * self.embed_scale
        embedded_positions = self.position_embeddings(positions)
        
        # Embeddings shape
        print(f"Positional Embedding Output Shape: {embedded_tokens.shape}")
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)


class TransformerDecoderBlock(layers.Layer):
    def __init__(self, embed_dim, ff_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.1
        )
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.1
        )
        self.ffn_layer_1 = layers.Dense(ff_dim, activation="relu")
        self.ffn_layer_2 = layers.Dense(embed_dim)

        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()

        self.embedding = PositionalEmbedding(
            embed_dim=EMBED_DIM, sequence_length=SEQ_LENGTH, vocab_size=VOCAB_SIZE
        )
        self.out = layers.Dense(VOCAB_SIZE, activation="softmax")

        self.dropout_1 = layers.Dropout(0.3)
        self.dropout_2 = layers.Dropout(0.5)
        self.supports_masking = True

    def call(self, inputs, encoder_outputs, training, mask=None):
        print(f"Decoder Input Shape: {inputs.shape}")
        
        inputs = self.embedding(inputs)
        causal_mask = self.get_causal_attention_mask(inputs)

        if mask is not None:
            padding_mask = tf.cast(mask[:, :, tf.newaxis], dtype=tf.int32)
            combined_mask = tf.cast(mask[:, tf.newaxis, :], dtype=tf.int32)
            combined_mask = tf.minimum(combined_mask, causal_mask)

        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=combined_mask,
            training=training,
        )
        out_1 = self.layernorm_1(inputs + attention_output_1)

        attention_output_2 = self.attention_2(
            query=out_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
            training=training,
        )
        out_2 = self.layernorm_2(out_1 + attention_output_2)

        ffn_out = self.ffn_layer_1(out_2)
        ffn_out = self.dropout_1(ffn_out, training=training)
        ffn_out = self.ffn_layer_2(ffn_out)

        ffn_out = self.layernorm_3(ffn_out + out_2, training=training)
        ffn_out = self.dropout_2(ffn_out, training=training)
        preds = self.out(ffn_out)
        
        print(f"Decoder Output Shape: {preds.shape}")
        return preds

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
            axis=0,
        )
        return tf.tile(mask, mult)


class ImageCaptioningModel(keras.Model):
    def __init__(
        self, cnn_model, encoder, decoder, num_captions_per_image=2, image_aug=None,
    ):
        super().__init__()
        self.cnn_model = cnn_model
        self.encoder = encoder
        self.decoder = decoder
        self.loss_tracker = keras.metrics.Mean(name="loss")
        self.acc_tracker = keras.metrics.Mean(name="accuracy")
        self.num_captions_per_image = num_captions_per_image
        self.image_aug = image_aug

    def calculate_loss(self, y_true, y_pred, mask):
        loss = self.loss(y_true, y_pred)
        mask = tf.cast(mask, dtype=loss.dtype)
        loss *= mask
        return tf.reduce_sum(loss) / tf.reduce_sum(mask)

    def calculate_accuracy(self, y_true, y_pred, mask):
        accuracy = tf.equal(y_true, tf.argmax(y_pred, axis=2))
        accuracy = tf.math.logical_and(mask, accuracy)
        accuracy = tf.cast(accuracy, dtype=tf.float32)
        mask = tf.cast(mask, dtype=tf.float32)
        return tf.reduce_sum(accuracy) / tf.reduce_sum(mask)

    def _compute_caption_loss_and_acc(self, img_embed, batch_seq, training=True):
        # print(f"Image Embedding Input Shape before passing to Encoder: {img_embed.shape}")
        
        # batch_seq = tf.expand_dims(batch_seq, axis=1)
        print(f"Batch Sequence Input Shape before slicing: {batch_seq.shape}")
        
        encoder_out = self.encoder(img_embed, training=training)
        batch_seq_inp = batch_seq[:, :-1] # Input sequence (without the last token)

        # print(f"Batch Sequence Input Shape before target sequence: {batch_seq_inp.shape}")
        
        batch_seq_true = batch_seq[:, 1:] # Target sequence (without the first token)
        mask = tf.math.not_equal(batch_seq_true, 0)
        
        print(f"Batch Sequence Input Shape: {batch_seq_inp.shape}")
        print(f"Batch Sequence True Shape: {batch_seq_true.shape}")
        
        batch_seq_pred = self.decoder(
            batch_seq_inp, encoder_out, training=training, mask=mask
        )

        print(f"Batch Sequence Predicted Shape: {batch_seq_pred.shape}")
        
        loss = self.calculate_loss(batch_seq_true, batch_seq_pred, mask)
        acc = self.calculate_accuracy(batch_seq_true, batch_seq_pred, mask)
        return loss, acc

    def train_step(self, batch_data):
        batch_img, batch_seq = batch_data

        # batch_seq = tf.expand_dims(batch_seq, axis=1)

        # print(f"Training Image Batch Shape before passing to CNN: {batch_img.shape}")
        total_loss = 0
        total_acc = 0
    
        if self.image_aug:
            batch_img = self.image_aug(batch_img)

        print(f"Training Image Batch Shape: {batch_img.shape}")
        print(f"Training Sequence Batch Shape: {batch_seq.shape}")
        
        # 1. Get image embeddings from CNN
        img_embed = self.cnn_model(batch_img)
        print(f"Image Embeddings Shape: {img_embed.shape}")

        # 2. Reshape CNN output to (batch_size, 1, embedding_dim)
        img_embed = tf.expand_dims(img_embed, axis=1)  # It should be (None, 1, 1024)

        print(f"Reshaped Image Embeddings for Encoder: {img_embed.shape}")
        
        # 3. Make sure batch_seq has 3 dimensions
        if batch_seq.shape.ndims == 2:
            # Reshape the sequence to have a third dimension (e.g., 1 caption per image)
            batch_seq = tf.expand_dims(batch_seq, axis=1)
        
        print(f"Updated Sequence Shape: {batch_seq.shape}")

        # 4. Accumulate loss and accuracy for each caption
        with tf.GradientTape() as tape:
            # Loop through each caption (batch_seq should be (batch_size, num_captions, sequence_length))
            num_captions_per_image = batch_seq.shape[1] # Extract the num_captions dimension
            
            for i in range(num_captions_per_image):
                loss, acc = self._compute_caption_loss_and_acc(
                    img_embed, batch_seq[:, i, :], training=True
                )
                total_loss += loss
                total_acc += acc

            # 5. Compute the mean loss and accuracy
            avg_loss = total_loss / tf.cast(num_captions_per_image, dtype=tf.float32)
            avg_acc = total_acc / tf.cast(num_captions_per_image, dtype=tf.float32)

        # Backpropagation
        # 6. Get the list of all the trainable weights
        train_vars = self.encoder.trainable_variables + self.decoder.trainable_variables
        
        # 7. Get the gradients (from the accumulated loss)
        grads = tape.gradient(avg_loss, train_vars)
    
        # 8. Update the trainable weights
        self.optimizer.apply_gradients(zip(grads, train_vars))
    
        # 9. Update the trackers
        self.loss_tracker.update_state(avg_loss)
        self.acc_tracker.update_state(avg_acc)
    
        # 10. Return the loss and accuracy values
        return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}

    def test_step(self, batch_data):
        batch_img, batch_seq = batch_data

        # batch_seq = tf.expand_dims(batch_seq, axis=1)

        batch_loss = 0
        batch_acc = 0

        # 1. Get image embeddings
        img_embed = self.cnn_model(batch_img)

        # 2. Pass each of the captions one by one to the decoder
        # along with the encoder outputs and compute the loss as well as accuracy
        # for each caption.
        for i in range(self.num_captions_per_image):
            loss, acc = self._compute_caption_loss_and_acc(
                img_embed, batch_seq[:, i, :], training=False
            )

            # 3. Update batch loss and batch accuracy
            batch_loss += loss
            batch_acc += acc

        batch_acc /= float(self.num_captions_per_image)

        # 4. Update the trackers
        self.loss_tracker.update_state(batch_loss)
        self.acc_tracker.update_state(batch_acc)

        # 5. Return the loss and accuracy values
        return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self.loss_tracker, self.acc_tracker]


cnn_model = get_cnn_model()
encoder = TransformerEncoderBlock(embed_dim=EMBED_DIM, dense_dim=FF_DIM, num_heads=1)
decoder = TransformerDecoderBlock(embed_dim=EMBED_DIM, ff_dim=FF_DIM, num_heads=2)
caption_model = ImageCaptioningModel(
    cnn_model=cnn_model, encoder=encoder, decoder=decoder, image_aug=image_augmentation,
)


CNN Model Summary:
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 299, 299, 3) 0                                            
__________________________________________________________________________________________________
rescaling_1 (Rescaling)         (None, 299, 299, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
normalization_1 (Normalization) (None, 299, 299, 3)  7           rescaling_1[0][0]                
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 301, 301, 3)  0           normalization_1[0][0]            
________________________________________________________________________

The loss function and early stopping is defined. The model is compiled with the same.

In [66]:
# Define the loss function
cross_entropy = keras.losses.SparseCategoricalCrossentropy(
    from_logits=False, reduction="none"
)

# EarlyStopping criteria
early_stopping = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)


# Learning Rate Scheduler for the optimizer
class LRSchedule(keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, post_warmup_learning_rate, warmup_steps):
        super().__init__()
        self.post_warmup_learning_rate = post_warmup_learning_rate
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        global_step = tf.cast(step, tf.float32)
        warmup_steps = tf.cast(self.warmup_steps, tf.float32)
        warmup_progress = global_step / warmup_steps
        warmup_learning_rate = self.post_warmup_learning_rate * warmup_progress
        return tf.cond(
            global_step < warmup_steps,
            lambda: warmup_learning_rate,
            lambda: self.post_warmup_learning_rate,
        )


# Create a learning rate schedule
num_train_steps = len(train_dataset) * EPOCHS
num_warmup_steps = num_train_steps // 15
lr_schedule = LRSchedule(post_warmup_learning_rate=1e-4, warmup_steps=num_warmup_steps)

# Compile the model
caption_model.compile(optimizer=keras.optimizers.Adam(lr_schedule), loss=cross_entropy)

# Model training and testing

Sets the version and checks if the model has been previously trained or being trained for the first time. If saved weights are found, they're loaded.

In [67]:
mdx = '231005'  # Sets the version
tmpx = f'/results/Model_weights/{mdx}/Temp/'

# Check if the directory exists
if not os.path.exists(tmpx):
    print(f"Directory {tmpx} does not exist. Creating the directory.")
    os.makedirs(tmpx)  # Create the directory if it doesn't exist

# Now check for the files
try:
    weight_path = f'{tmpx}imgcap_{mdx}'
    fls = os.listdir(tmpx)

    # Look for specific weight files (like .index or .data-00000-of-00001)
    checkpoint_files = [f for f in fls if "imgcap_" in f]
    
    if len(checkpoint_files) > 0:
        print("Found saved weights, loading them now...")
        caption_model.load_weights(weight_path)
        print("Saved weights loaded successfully")
    else:
        print("No saved weights found, training from scratch")
except FileNotFoundError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Directory /results/Model_weights/231005/Temp/ does not exist. Creating the directory.
No saved weights found, training from scratch


Lookup dictionary created and output sequence length is set.

In [68]:
# Load the vocabulary during inference
with open('/results/vocab.json', 'r', encoding='utf-8') as f:
    vocab = json.load(f)

# Recreate the vectorization object and set its vocabulary
vectorization = TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=SEQ_LENGTH,
    standardize=custom_standardization,
)

# Set the vocabulary during inference
vectorization.set_vocabulary(vocab)

print(f"Vocabulary loaded with {len(vocab)} tokens.")

index_lookup = dict(zip(range(len(vocab)), vocab))
max_decoded_sentence_length = SEQ_LENGTH - 1
valid_images = list(valid_data.keys())

Vocabulary loaded with 1661 tokens.


The function reads an image from the given path. It uses the image to generate an caption. The same are displayed.

In [69]:
# Handle invalid token indices
def generate_caption(dt, ix):
    # Select a random image from the validation dataset
    sample_img = f'{dt}images/{ix}'

    # Read the image from the disk
    sample_img = decode_and_resize(sample_img)
    img = sample_img.numpy().clip(0, 255).astype(np.uint8)
    plt.axis('off')
    plt.imshow(img)
    plt.show()
    
    # Pass the image to the CNN
    img = tf.expand_dims(sample_img, 0)
    img = caption_model.cnn_model(img)

    # Expand dimensions to make it compatible with the encoder
    img = tf.expand_dims(img, 1)  # Adding sequence dimension, shape becomes (batch_size, 1, embed_dim)

    # Pass the image features to the Transformer encoder
    encoded_img = caption_model.encoder(img, training=False)

    # Generate the caption using the Transformer decoder
    decoded_caption = "<start> "
    for i in range(max_decoded_sentence_length):
        # Ensure decoded_caption is passed as a list of strings
        tokenized_caption = vectorization(tf.constant([decoded_caption]))[:, :-1]
        
        # Create mask for the tokenized caption
        mask = tf.math.not_equal(tokenized_caption, 0)

        predictions = caption_model.decoder(
            tokenized_caption, encoded_img, training=False, mask=mask
        )

        # Get the predicted token
        sampled_token_index = np.argmax(predictions[0, i, :])

        # Check if sampled_token_index is in the vocabulary range
        if sampled_token_index >= len(vocab):
            print(f"Warning: Token index {sampled_token_index} out of range")
            continue  # Skip to the next iteration if the token is out of range

        sampled_token = index_lookup[sampled_token_index]

        if sampled_token == "<end>":
            break

        decoded_caption += " " + sampled_token

    # Clean up the decoded caption
    decoded_caption = decoded_caption.replace("<start> ", "")
    decoded_caption = decoded_caption.replace(" <end>", "").strip()
    print("\nPredicted Caption: ", decoded_caption)
    print()


Test image path.

In [70]:
img_dt = "/data/test/rxxch9vw59-2/"
imgs = os.listdir(img_dt+"images")

Generates a caption for a random image among test images.

In [71]:
random_image = np.random.choice(imgs)

In [72]:
# generate_caption(img_dt,random_image)
generate_caption(img_dt,'1228.png')

Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)


Invalid limit will be ignored.
  plt.imshow(img)


Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)

The model is trained.

In [73]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Define checkpoint callback
checkpoint_cb = ModelCheckpoint(
    filepath=f'{tmpx}imgcap_{mdx}', 
    save_weights_only=True, 
    save_best_only=True
)

try:
    # Fit the model
    caption_model.fit(
        train_dataset,
        epochs=EPOCHS,
        validation_data=valid_dataset,
        callbacks=[early_stopping, checkpoint_cb],
    )
except ValueError as e:
    print(f"ValueError during model fitting: {e}")

Training Image Batch Shape: (None, 299, 299, 3)
Training Sequence Batch Shape: (None, None)
Image Embeddings Shape: (None, 512)
Reshaped Image Embeddings for Encoder: (None, 1, 512)
Updated Sequence Shape: (None, 1, None)
Batch Sequence Input Shape before slicing: (None, None)
Encoder Input Shape: (None, 1, 512)
Encoder Input Shape before LayerNorm: (None, 1, 512)
Encoder Input Shape after LayerNorm: (None, 1, 512)
Encoder Output Shape: (None, 1, 512)
Batch Sequence Input Shape: (None, None)
Batch Sequence True Shape: (None, None)
Decoder Input Shape: (None, None)
Positional Embedding Input Shape: (None, None)
Positional Embedding Output Shape: (None, None, 512)
Decoder Output Shape: (None, None, 1661)
Batch Sequence Predicted Shape: (None, None, 1661)
Training Image Batch Shape: (None, 299, 299, 3)
Training Sequence Batch Shape: (None, None)
Image Embeddings Shape: (None, 512)
Reshaped Image Embeddings for Encoder: (None, 1, 512)
Updated Sequence Shape: (None, 1, None)
Batch Sequence 



  1/458 [..............................] - ETA: 38:59 - loss: 7.8983 - acc: 0.0000e+00



  2/458 [..............................] - ETA: 5:25 - loss: 7.8973 - acc: 0.0000e+00 



  3/458 [..............................] - ETA: 5:33 - loss: 7.9308 - acc: 0.0000e+00



  5/458 [..............................] - ETA: 5:12 - loss: 7.8836 - acc: 2.8939e-04



  6/458 [..............................] - ETA: 4:54 - loss: 7.8784 - acc: 4.2369e-04



  7/458 [..............................] - ETA: 4:42 - loss: 7.8725 - acc: 4.9727e-04



 11/458 [..............................] - ETA: 4:20 - loss: 7.8277 - acc: 7.3024e-04



 12/458 [..............................] - ETA: 4:17 - loss: 7.8127 - acc: 7.6279e-04



 14/458 [..............................] - ETA: 4:10 - loss: 7.7823 - acc: 8.3106e-04



 15/458 [..............................] - ETA: 4:15 - loss: 7.7638 - acc: 8.9835e-04



 16/458 [>.............................] - ETA: 4:12 - loss: 7.7379 - acc: 9.9964e-04



 17/458 [>.............................] - ETA: 4:09 - loss: 7.7147 - acc: 0.0011    



 18/458 [>.............................] - ETA: 4:14 - loss: 7.6876 - acc: 0.0013



 20/458 [>.............................] - ETA: 4:10 - loss: 7.6496 - acc: 0.0018



 21/458 [>.............................] - ETA: 4:07 - loss: 7.6186 - acc: 0.0023



 22/458 [>.............................] - ETA: 4:09 - loss: 7.5841 - acc: 0.0029



 24/458 [>.............................] - ETA: 4:13 - loss: 7.5097 - acc: 0.0045



 27/458 [>.............................] - ETA: 4:13 - loss: 7.3976 - acc: 0.0076



 28/458 [>.............................] - ETA: 4:11 - loss: 7.3601 - acc: 0.0087



 29/458 [>.............................] - ETA: 4:11 - loss: 7.3245 - acc: 0.0099



 34/458 [=>............................] - ETA: 4:10 - loss: 7.1395 - acc: 0.0162



 39/458 [=>............................] - ETA: 4:04 - loss: 6.9753 - acc: 0.0229



 40/458 [=>............................] - ETA: 4:03 - loss: 6.9419 - acc: 0.0242



 41/458 [=>............................] - ETA: 4:01 - loss: 6.9143 - acc: 0.0255



 42/458 [=>............................] - ETA: 4:00 - loss: 6.8875 - acc: 0.0268



 43/458 [=>............................] - ETA: 3:58 - loss: 6.8560 - acc: 0.0281



 44/458 [=>............................] - ETA: 3:57 - loss: 6.8251 - acc: 0.0294



 47/458 [==>...........................] - ETA: 3:53 - loss: 6.7531 - acc: 0.0331



 48/458 [==>...........................] - ETA: 3:52 - loss: 6.7252 - acc: 0.0343



 50/458 [==>...........................] - ETA: 3:50 - loss: 6.6801 - acc: 0.0367



 51/458 [==>...........................] - ETA: 3:49 - loss: 6.6552 - acc: 0.0378



 53/458 [==>...........................] - ETA: 3:47 - loss: 6.6190 - acc: 0.0401



 54/458 [==>...........................] - ETA: 3:47 - loss: 6.5925 - acc: 0.0412



 55/458 [==>...........................] - ETA: 3:49 - loss: 6.5690 - acc: 0.0423



 57/458 [==>...........................] - ETA: 3:50 - loss: 6.5294 - acc: 0.0444



 59/458 [==>...........................] - ETA: 3:48 - loss: 6.4823 - acc: 0.0465



 62/458 [===>..........................] - ETA: 3:44 - loss: 6.4112 - acc: 0.0495



 66/458 [===>..........................] - ETA: 3:41 - loss: 6.3350 - acc: 0.0534



 67/458 [===>..........................] - ETA: 3:40 - loss: 6.3164 - acc: 0.0543



 69/458 [===>..........................] - ETA: 3:38 - loss: 6.2832 - acc: 0.0561



 73/458 [===>..........................] - ETA: 3:36 - loss: 6.2224 - acc: 0.0597



 75/458 [===>..........................] - ETA: 3:34 - loss: 6.1907 - acc: 0.0614



 76/458 [===>..........................] - ETA: 3:33 - loss: 6.1734 - acc: 0.0622



 80/458 [====>.........................] - ETA: 3:30 - loss: 6.1129 - acc: 0.0655



 83/458 [====>.........................] - ETA: 3:30 - loss: 6.0802 - acc: 0.0678



 84/458 [====>.........................] - ETA: 3:30 - loss: 6.0658 - acc: 0.0686



 88/458 [====>.........................] - ETA: 3:27 - loss: 6.0166 - acc: 0.0716



 93/458 [=====>........................] - ETA: 3:23 - loss: 5.9632 - acc: 0.0750



 94/458 [=====>........................] - ETA: 3:24 - loss: 5.9552 - acc: 0.0757



 95/458 [=====>........................] - ETA: 3:23 - loss: 5.9447 - acc: 0.0764



 98/458 [=====>........................] - ETA: 3:21 - loss: 5.9146 - acc: 0.0783



100/458 [=====>........................] - ETA: 3:20 - loss: 5.8937 - acc: 0.0796



101/458 [=====>........................] - ETA: 3:19 - loss: 5.8863 - acc: 0.0802



102/458 [=====>........................] - ETA: 3:19 - loss: 5.8736 - acc: 0.0809



104/458 [=====>........................] - ETA: 3:18 - loss: 5.8591 - acc: 0.0821
























































































































































































































































































































































































































































































































































































































































































































    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 1330, in test_function  *
        return step_function(self, iterator)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 1320, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 1313, in run_step  **
        outputs = model.test_step(data)
    File "/tmp/ipykernel_36/364036858.py", line 308, in test_step
        img_embed, batch_seq[:, i, :], training=False

    ValueError: Index out of range using input dim 2; input has only 2 dims for '{{node strided_slice}} = StridedSlice[Index=DT_INT32, T=DT_INT64, begin_mask=5, ellipsis_mask=0, end_mask=5, new_axis_mask=0, shrink_axis_mask=2](IteratorGetNext:1, strided_slice/stack, strided_slice/stack_1, strided_slice/stack_2)' with input shapes: [?,?], [3], [3], [3] and with computed input tensors: input[3] = <1 1 

Generate captions for all test images.

In [74]:
for i in imgs:
  generate_caption(img_dt, i)

Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)


Invalid limit will be ignored.
  plt.imshow(img)


Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: 



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positio



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positio



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positio



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positio



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positio



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: 



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Posit



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha



Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)



Encoder Input Shape: (1, 1, 512)
Encoder Input Shape before LayerNorm: (1, 1, 512)
Encoder Input Shape after LayerNorm: (1, 1, 512)
Encoder Output Shape: (1, 1, 512)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Shape: (1, 14)
Positional Embedding Output Shape: (1, 14, 512)
Decoder Output Shape: (1, 14, 1661)
Decoder Input Shape: (1, 14)
Positional Embedding Input Sha

Save the weights of the trained model.

In [75]:
# After training the model, save the weights
caption_model.save_weights(f'{tmpx}imgcap_{mdx}', save_format='tf')

Dump the vectorised vocabulary.

In [76]:
# Define the directory path
directory = f'/results/Vocab/{mdx}'

# Create the directory if it doesn't exist
os.makedirs(directory, exist_ok=True)

# Save the vocabulary using pickle
with open(f'{directory}/vocab_{mdx}', 'wb') as f:
    pickle.dump(vocab, f)