# Preliminary settings

Libraries imported.

In [224]:
import os
import re
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import efficientnet
from tensorflow.keras.layers import TextVectorization

from datetime import datetime
seed = int(round(datetime.now().timestamp()))
np.random.seed(seed)
tf.random.set_seed(seed)

Path to data.

In [225]:
dx = "/data/train/rxxch9vw59.2/"

Important constants.

In [226]:
# Path to the images
IMAGES_PATH = dx+"images"

# Desired image dimensions
IMAGE_SIZE = (224, 224)

# Vocabulary size
VOCAB_SIZE = 6000
# later to be updated to VOCAB_SIZE = len(filtered_words) + 2

# Fixed length allowed for any sequence
SEQ_LENGTH = 12 # For slightly longer captions

# Dimension for the image embeddings and token embeddings
EMBED_DIM = 256

# Per-layer units in the feed-forward network
FF_DIM = 256

# Other training parameters
BATCH_SIZE = 25
EPOCHS = 15
AUTOTUNE = tf.data.AUTOTUNE

# Dataset creation

The image files are loaded. Each image is paired with two captions.
The pairs are shuffled and split into 20% test and 80% train set.

In [227]:
def load_captions_data(filename):
    """Loads captions (text) data and maps them to corresponding images.

    Args:
        filename: Path to the text file containing caption data.

    Returns:
        caption_mapping: Dictionary mapping image names and the corresponding captions
        text_data: List containing all the available captions
    """
    with open(filename, encoding="utf8") as caption_file:
        caption_data = json.load(caption_file)
        caption_mapping = {}
        text_data = []

        for item in caption_data:  # Iterate over the list of dictionaries
            img_name = os.path.join(IMAGES_PATH, item['filename'].strip())  # Access 'filename'
            caption_mapping[img_name] = ["<start> " + caption.strip() + " <end>" for caption in item['caption']]  # Access 'caption'
            text_data.extend(caption_mapping[img_name])

        return caption_mapping, text_data


def train_val_split(caption_data, train_size=0.8, shuffle=True):
    """Split the captioning dataset into train and validation sets.

    Args:
        caption_data (dict): Dictionary containing the mapped caption data
        train_size (float): Fraction of all the full dataset to use as training data
        shuffle (bool): Whether to shuffle the dataset before splitting

    Returns:
        Training and validation datasets as two separated dicts
    """

    # 1. Get the list of all image names
    all_images = list(caption_data.keys())

    # 2. Shuffle if necessary
    if shuffle:
        np.random.shuffle(all_images)

    # 3. Split into training and validation sets
    train_size = int(len(caption_data) * train_size)

    training_data = {
        img_name: caption_data[img_name] for img_name in all_images[:train_size]
    }
    validation_data = {
        img_name: caption_data[img_name] for img_name in all_images[train_size:]
    }

    # 4. Return the splits
    return training_data, validation_data


# Load the dataset
captions_mapping, text_data = load_captions_data(dx + "captions.json")

# Split the dataset into training and validation sets
train_data, valid_data = train_val_split(captions_mapping)
print("Number of training samples: ", len(train_data))
print("Number of validation samples: ", len(valid_data))

Number of training samples:  7323
Number of validation samples:  1831


In [228]:
# Import necessary libraries
from collections import Counter
import json
import matplotlib.pyplot as plt

# Load the captions data from the file
captions_path = dx + "captions.json"
with open(captions_path, 'r', encoding='utf-8') as f:
    captions_data = json.load(f)

# Flatten all captions into a single list
captions = []
for item in captions_data:
    captions.extend(item['caption'])

# Tokenize the captions by splitting on spaces
all_words = ' '.join(captions).split()

# Count the frequency of each word
word_freq = Counter(all_words)

# Visualize the word frequency distribution
plt.figure(figsize=(10,6))
plt.hist(list(word_freq.values()), bins=50)
plt.yscale('log')
plt.title('Word Frequency Distribution')
plt.xlabel('Word Count')
plt.ylabel('Frequency')

# Save the visualization as an image
visualization_path = '/results/' + "word_frequency_distribution.png"
plt.savefig(visualization_path)
plt.close()
print(f"Visualization saved at {visualization_path}")

# Save word frequencies to a CSV file
csv_path = '/results/' + "word_frequencies.csv"
with open(csv_path, 'w', encoding='utf-8') as f:
    f.write("Word,Frequency\n")
    for word, freq in sorted(word_freq.items(), key=lambda item: item[1], reverse=True):
        f.write(f"{word},{freq}\n")
print(f"Word frequencies saved at {csv_path}")

# Show the 10 most common words for reference
print("Top 10 most common words:", word_freq.most_common(10))


Visualization saved at /results/word_frequency_distribution.png
Word frequencies saved at /results/word_frequencies.csv
Top 10 most common words: [('আছে।', 9264), ('একজন', 8775), ('একটি', 6006), ('পুরুষ', 4564), ('মানুষ', 4091), ('দাড়িয়ে', 3489), ('বসে', 3379), ('ও', 2350), ('দিয়ে', 2223), ('জন', 2201)]


In [229]:
rare_tokens = [word for word, freq in word_freq.items() if freq == 1]
print(f"Rare tokens ({len(rare_tokens)}):", rare_tokens[:10])


Rare tokens (2840): ['পিটানোর', 'ঘুরানো,', 'গুলোর', 'ঢুল', 'অফ', 'হুয়াট', 'প্রনে', 'ঝার', 'বসতি', 'বানিয়ে,']


In [230]:
# Token frequency adjustment for balanced coverage
rare_threshold = 2  # Minimum count for inclusion
frequent_threshold = float('inf')  # No upper limit on frequency
filtered_words = {word: freq for word, freq in word_freq.items()
                  if rare_threshold <= freq <= frequent_threshold}

# Check how many words have left after filtering
print(f"Number of filtered words: {len(filtered_words)}")

# Determine the vocabulary size with a buffer for special tokens
VOCAB_SIZE = len(filtered_words) + 2 # Adding 2 for `""` and `[UNK]`
print(f"Balanced Vocabulary Size: {VOCAB_SIZE}")

Number of filtered words: 3136
Balanced Vocabulary Size: 3138


In [231]:
text_data[:3]

['<start> তিন জন মেয়ে মানুষ আছে। এক জন দাড়িয়ে আছে আর দুই জন বসে আছে। <end>',
 '<start> একটি হলুদ জামা পায়জামা পরা মহিলা দাড়িয়ে হাতে একটি বেত নিয়ে পিটানোর ভাব দেখাচ্ছে আর ছোট একটি মেয়ে পিছনে ব্যাগ নিয়ে বসে কাঁদছে। <end>',
 '<start> অনেক মেয়ে মানুষ বসে আছে। <end>']

Image augmentation is performed and the captions are vectorised.

In [232]:
def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")


strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
strip_chars = strip_chars.replace("<", "")
strip_chars = strip_chars.replace(">", "")

# List of words to include in the final vocabulary
final_vocab = list(filtered_words.keys())
print(f"Final Vocabulary Length: {len(final_vocab)}")

# Initialize the TextVectorization layer with the final vocabulary
vectorization = TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=SEQ_LENGTH,
    standardize=custom_standardization,
)
vectorization.set_vocabulary(final_vocab)

# Adapting the vectorization layer
# vectorization.adapt(text_data)

# Save the vocabulary to a JSON file after training
vocab = vectorization.get_vocabulary()
with open('/results/vocab.json', 'w', encoding='utf-8') as f:
    json.dump(vocab, f, ensure_ascii=False, indent=4)
print(f"Vocabulary saved with {len(vocab)} tokens.")

# Data augmentation for image data
image_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1), # Reduced rotation for faster preprocessing
        layers.RandomContrast(0.2), # Lighter contrast adjustment
    ]
)


Final Vocabulary Length: 3136
Vocabulary saved with 3138 tokens.


In [233]:
covered = sum(word_freq[word] for word in final_vocab)
total = sum(word_freq.values())
print(f"Coverage: {covered / total * 100:.2f}%")

Coverage: 98.17%


A function to expand the dataset such that each image is paired with each caption separately.

In [234]:
def flatten_dataset(caption_mapping):
    """Flattens the dataset by creating separate (image, caption) pairs for each caption of an image.

    Args:
        caption_mapping: Dictionary mapping image names to a list of captions.

    Returns:
        images: List of image paths (repeated for each caption).
        captions: List of corresponding captions (each caption as a string).
    """
    images = []
    captions = []

    # For each image, repeat the image path for each caption and append to the lists
    for img_path, caption_list in caption_mapping.items():
        for caption in caption_list:
            images.append(img_path)
            captions.append(caption)

    return images, captions


The train and test datasets are created using TensorFlow Dataset.

In [235]:
from collections import defaultdict
from PIL import Image
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Constants
# Number of captions per image to use
NUM_CAPTIONS_PER_IMAGE = 2

# Suppress PNG warnings using PIL
def preprocess_image(image_path):
    """Preprocess image to suppress warnings."""
    try:
        with Image.open(image_path) as img:
            img = img.convert("RGB")  # Strip unnecessary metadata
            img.save(image_path, "PNG", icc_profile=None)
    except Exception as e:
        print(f"Error processing {image_path}: {e}")

# Decode, resize, and preprocess images
def decode_and_resize(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMAGE_SIZE)
    img = tf.image.convert_image_dtype(img, tf.float32)
    return img


def process_input(img_path, caption):
    img = decode_and_resize(img_path)
    
    # Vectorize the caption
    captions_vectorized = vectorization(caption)
    
    return img, captions_vectorized

# Function to group captions by image
# Group captions by image
def group_captions_by_image(images, captions):
    grouped_data = defaultdict(list)
    for img, cap in zip(images, captions):
        grouped_data[img].append(cap)

    grouped_images = []
    grouped_captions = []

    for img, caps in grouped_data.items():
        grouped_images.append(img)
        grouped_captions.append(
            caps[:NUM_CAPTIONS_PER_IMAGE] + ["<pad>"] * (NUM_CAPTIONS_PER_IMAGE - len(caps))
        )

    print(f"Grouped {len(grouped_images)} images with captions.")
    return grouped_images, grouped_captions

# Process images and captions
def process_input(img_path, captions):
    img = decode_and_resize(img_path)
    captions_vectorized = vectorization(captions)
    captions_padded = pad_sequences(captions_vectorized, maxlen=SEQ_LENGTH, padding="post")
    return img, captions_padded

# Function for dataset creation
# Create a TensorFlow Dataset
def make_dataset(images, captions, num_captions_per_image):
    # Group captions by image
    grouped_images, grouped_captions = group_captions_by_image(images, captions)

    # Ensure all groups have a consistent number of captions (pad/truncate)
    padded_captions = [
        caps[:num_captions_per_image] + ["<pad>"] * (num_captions_per_image - len(caps))
        if len(caps) < num_captions_per_image else caps[:num_captions_per_image]
        for caps in grouped_captions
    ]

    # Create a TensorFlow Dataset
    dataset = tf.data.Dataset.from_tensor_slices((grouped_images, padded_captions))
    dataset = dataset.shuffle(BATCH_SIZE * 8)

    # Map images and captions to processed inputs
    dataset = dataset.map(
        lambda img, caps: (decode_and_resize(img), vectorization(caps)),
        num_parallel_calls=AUTOTUNE
    )

    for img, cap in dataset.take(1):
        print(f"Image and grouped caption shape: {img.shape, cap.shape} \n")

    # Batch and prefetch the dataset
    dataset = dataset.batch(BATCH_SIZE).prefetch(AUTOTUNE)

    return dataset


# def make_dataset(images, captions):
#     print([len(i) for i in images[:5]])
#     print([len(i) for i in captions[:5]])

# Dataset Preparation
# Flatten the training data and create datasets
train_images, train_captions = flatten_dataset(train_data)
valid_images, valid_captions = flatten_dataset(valid_data)

# Pass the list of images and the list of corresponding captions
# Create train and validation datasets
print("Creating training dataset... \n")
train_dataset = make_dataset(train_images, train_captions, NUM_CAPTIONS_PER_IMAGE)
train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

print("Creating validation dataset... \n")
valid_dataset = make_dataset(valid_images, valid_captions, NUM_CAPTIONS_PER_IMAGE)

# Print the shapes of the datasets
# Debugging shapes
for img_batch, cap_batch in train_dataset.take(1):
    print(f"Training batch image shape: {img_batch.shape}, Caption shape: {cap_batch.shape}")
for img_batch, cap_batch in valid_dataset.take(1):
    print(f"Validation batch image shape: {img_batch.shape}, Caption shape: {cap_batch.shape}")

Creating training dataset... 

Grouped 7323 images with captions.
Image and grouped caption shape: (TensorShape([224, 224, 3]), TensorShape([2, 12])) 

Creating validation dataset... 

Grouped 1831 images with captions.
Image and grouped caption shape: (TensorShape([224, 224, 3]), TensorShape([2, 12])) 





Training batch image shape: (25, 224, 224, 3), Caption shape: (25, 2, 12)




Validation batch image shape: (25, 224, 224, 3), Caption shape: (25, 2, 12)


In [236]:
def compute_dataset_statistics(captions):
    """Print statistics for the caption dataset."""
    caption_lengths = [len(cap.split()) for cap in captions]
    print(f"Total Captions: {len(captions)}")
    print(f"Max Caption Length: {max(caption_lengths)}")
    print(f"Min Caption Length: {min(caption_lengths)}")
    print(f"Mean Caption Length: {sum(caption_lengths) / len(caption_lengths):.2f}")

# Compute statistics
print("Training Dataset Statistics:")
compute_dataset_statistics(train_captions)

print("Validation Dataset Statistics:")
compute_dataset_statistics(valid_captions)


Training Dataset Statistics:
Total Captions: 14646
Max Caption Length: 39
Min Caption Length: 4
Mean Caption Length: 10.49
Validation Dataset Statistics:
Total Captions: 3662
Max Caption Length: 40
Min Caption Length: 4
Mean Caption Length: 10.42


# Transformer model creation

The Transformer uses the EfficientNetB0 CNN.
The Encoder and Decoder block and Positional Embedding layer is seperately created by inheriting the Layer class.
The Transformer model is created by compiling the layers and inheriting the Model class.

In [237]:
def get_cnn_model():
    base_model = efficientnet.EfficientNetB0(
        input_shape=(*IMAGE_SIZE, 3), include_top=False, weights="imagenet",
    )
    # We freeze our feature extractor
    base_model.trainable = False
    base_model_out = base_model.output
    
    # Reduce the sequence length using a pooling operation
    # Usign GlobalAveragePooling2D to reduce the spatial dimensions
    base_model_out = layers.GlobalAveragePooling2D()(base_model_out)
    
    # Optionally, project the output to match the embedding size
    base_model_out = layers.Dense(EMBED_DIM)(base_model_out)
    
    cnn_model = keras.models.Model(base_model.input, base_model_out)
    
    # Print CNN Model Summary
    print("\nCNN Model Summary:")
    cnn_model.summary()
    return cnn_model


class TransformerEncoderBlock(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.0
        )
        self.layernorm_1 = layers.LayerNormalization() # Potential error source
        self.layernorm_2 = layers.LayerNormalization()
        self.dense_1 = layers.Dense(embed_dim, activation="relu")

    def call(self, inputs, training, mask=None):
        # Input shape
        print(f"Encoder Input Shape: {inputs.shape}")

        print(f"Encoder Input Shape before LayerNorm: {inputs.shape}")
        inputs = self.layernorm_1(inputs)
        print(f"Encoder Input Shape after LayerNorm: {inputs.shape}")
        
        inputs = self.dense_1(inputs)

        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=None,
            training=training,
        )
        
        out_1 = self.layernorm_2(inputs + attention_output_1)

        # Output shape
        print(f"Encoder Output Shape: {out_1.shape}")
        return out_1


class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.embed_scale = tf.math.sqrt(tf.cast(embed_dim, tf.float32))

    def call(self, inputs):
        print(f"Positional Embedding Input Shape: {inputs.shape}")
        
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_tokens = embedded_tokens * self.embed_scale
        embedded_positions = self.position_embeddings(positions)
        
        # Embeddings shape
        print(f"Positional Embedding Output Shape: {embedded_tokens.shape}")
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)


class TransformerDecoderBlock(layers.Layer):
    def __init__(self, embed_dim, ff_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.1
        )
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.1
        )
        self.ffn_layer_1 = layers.Dense(ff_dim, activation="relu")
        self.ffn_layer_2 = layers.Dense(embed_dim)

        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()

        self.embedding = PositionalEmbedding(
            embed_dim=EMBED_DIM, sequence_length=SEQ_LENGTH, vocab_size=VOCAB_SIZE
        )
        self.out = layers.Dense(VOCAB_SIZE, activation="softmax")

        self.dropout_1 = layers.Dropout(0.3)
        self.dropout_2 = layers.Dropout(0.5)
        self.supports_masking = True

    def call(self, inputs, encoder_outputs, training, mask=None):
        print(f"Decoder Input Shape: {inputs.shape}")
        
        inputs = self.embedding(inputs)
        causal_mask = self.get_causal_attention_mask(inputs)

        if mask is not None:
            padding_mask = tf.cast(mask[:, :, tf.newaxis], dtype=tf.int32)
            combined_mask = tf.cast(mask[:, tf.newaxis, :], dtype=tf.int32)
            combined_mask = tf.minimum(combined_mask, causal_mask)

        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=combined_mask,
            training=training,
        )
        out_1 = self.layernorm_1(inputs + attention_output_1)

        attention_output_2 = self.attention_2(
            query=out_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
            training=training,
        )
        out_2 = self.layernorm_2(out_1 + attention_output_2)

        ffn_out = self.ffn_layer_1(out_2)
        ffn_out = self.dropout_1(ffn_out, training=training)
        ffn_out = self.ffn_layer_2(ffn_out)

        ffn_out = self.layernorm_3(ffn_out + out_2, training=training)
        ffn_out = self.dropout_2(ffn_out, training=training)
        preds = self.out(ffn_out)
        
        print(f"Decoder Output Shape: {preds.shape}")
        return preds

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
            axis=0,
        )
        return tf.tile(mask, mult)


class ImageCaptioningModel(keras.Model):
    def __init__(
        self, cnn_model, encoder, decoder, num_captions_per_image=2, image_aug=None,
    ):
        super().__init__()
        self.cnn_model = cnn_model
        self.encoder = encoder
        self.decoder = decoder
        self.loss_tracker = keras.metrics.Mean(name="loss")
        self.acc_tracker = keras.metrics.Mean(name="accuracy")
        self.num_captions_per_image = num_captions_per_image
        self.image_aug = image_aug

    def calculate_loss(self, y_true, y_pred, mask):
        loss = self.loss(y_true, y_pred)
        mask = tf.cast(mask, dtype=loss.dtype)
        loss *= mask
        return tf.reduce_sum(loss) / tf.reduce_sum(mask)

    def calculate_accuracy(self, y_true, y_pred, mask):
        accuracy = tf.equal(y_true, tf.argmax(y_pred, axis=2))
        accuracy = tf.math.logical_and(mask, accuracy)
        accuracy = tf.cast(accuracy, dtype=tf.float32)
        mask = tf.cast(mask, dtype=tf.float32)
        return tf.reduce_sum(accuracy) / tf.reduce_sum(mask)

    def _compute_caption_loss_and_acc(self, img_embed, batch_seq, training=True):
        # print(f"Image Embedding Input Shape before passing to Encoder: {img_embed.shape}")
        
        # batch_seq = tf.expand_dims(batch_seq, axis=1)
        print(f"Batch Sequence Input Shape before slicing: {batch_seq.shape}")
        
        encoder_out = self.encoder(img_embed, training=training)
        batch_seq_inp = batch_seq[:, :-1] # Input sequence (without the last token)

        # print(f"Batch Sequence Input Shape before target sequence: {batch_seq_inp.shape}")
        
        batch_seq_true = batch_seq[:, 1:] # Target sequence (without the first token)
        mask = tf.math.not_equal(batch_seq_true, 0)
        
        print(f"Batch Sequence Input Shape: {batch_seq_inp.shape}")
        print(f"Batch Sequence True Shape: {batch_seq_true.shape}")
        
        batch_seq_pred = self.decoder(
            batch_seq_inp, encoder_out, training=training, mask=mask
        )

        print(f"Batch Sequence Predicted Shape: {batch_seq_pred.shape}")
        
        loss = self.calculate_loss(batch_seq_true, batch_seq_pred, mask)
        acc = self.calculate_accuracy(batch_seq_true, batch_seq_pred, mask)
        return loss, acc

    def train_step(self, batch_data):
        batch_img, batch_seq = batch_data

        # batch_seq = tf.expand_dims(batch_seq, axis=1)

        # print(f"Training Image Batch Shape before passing to CNN: {batch_img.shape}")
        total_loss = 0
        total_acc = 0
    
        if self.image_aug:
            batch_img = self.image_aug(batch_img)

        print(f"Training Image Batch Shape: {batch_img.shape}")
        print(f"Training Sequence Batch Shape: {batch_seq.shape}")
        
        # 1. Get image embeddings from CNN
        img_embed = self.cnn_model(batch_img)
        print(f"Image Embeddings Shape: {img_embed.shape}")

        # 2. Reshape CNN output to (batch_size, 1, embedding_dim)
        img_embed = tf.expand_dims(img_embed, axis=1)  # It should be (None, 1, 1024)

        print(f"Reshaped Image Embeddings for Encoder: {img_embed.shape}")
        
        # 3. Make sure batch_seq has 3 dimensions
        if batch_seq.shape.ndims == 2:
            # Reshape the sequence to have a third dimension (e.g., 1 caption per image)
            batch_seq = tf.expand_dims(batch_seq, axis=1)
        
        print(f"Updated Sequence Shape: {batch_seq.shape}")

        # 4. Accumulate loss and accuracy for each caption
        with tf.GradientTape() as tape:
            # Loop through each caption (batch_seq should be (batch_size, num_captions, sequence_length))
            num_captions_per_image = batch_seq.shape[1] # Extract the num_captions dimension
            
            for i in range(self.num_captions_per_image):
                loss, acc = self._compute_caption_loss_and_acc(
                    img_embed, batch_seq[:, i, :], training=True
                )
                total_loss += loss
                total_acc += acc

            # 5. Compute the mean loss and accuracy
            avg_loss = total_loss / tf.cast(self.num_captions_per_image, dtype=tf.float32)
            avg_acc = total_acc / tf.cast(self.num_captions_per_image, dtype=tf.float32)

        # Backpropagation
        # 6. Get the list of all the trainable weights
        train_vars = self.encoder.trainable_variables + self.decoder.trainable_variables
        
        # 7. Get the gradients (from the accumulated loss)
        grads = tape.gradient(avg_loss, train_vars)
    
        # 8. Update the trainable weights
        self.optimizer.apply_gradients(zip(grads, train_vars))
    
        # 11. Update the trackers
        self.loss_tracker.update_state(avg_loss)
        self.acc_tracker.update_state(avg_acc)
    
        # 12. Return the loss and accuracy values
        return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}

    def test_step(self, batch_data):
        batch_img, batch_seq = batch_data
        print(f"Validation Image Batch Shape: {batch_img.shape}")
        print(f"Validation Sequence Batch Shape: {batch_seq.shape}")

        # batch_seq = tf.expand_dims(batch_seq, axis=1)

        batch_loss = 0
        batch_acc = 0

        # 1. Get image embeddings
        img_embed = self.cnn_model(batch_img)
        print(f"Image Embeddings Shape: {img_embed.shape}")
        img_embed = tf.expand_dims(img_embed, axis=1)
        print(f"Reshaped Image Embeddings Shape: {img_embed.shape}")

        # 2. Pass each of the captions one by one to the decoder
        # along with the encoder outputs and compute the loss as well as accuracy
        # for each caption.
        # Loop through captions
        for i in range(self.num_captions_per_image):
            batch_seq_inp = batch_seq[:, i, :-1]
            batch_seq_true = batch_seq[:, i, 1:]
            print(f"Validation Sequence Input Shape: {batch_seq_inp.shape}")
            print(f"Validation Sequence True Shape: {batch_seq_true.shape}")
        
            loss, acc = self._compute_caption_loss_and_acc(
                img_embed, batch_seq[:, i, :], training=False
            )

            # 3. Update batch loss and batch accuracy
            batch_loss += loss
            batch_acc += acc

        batch_acc /= float(self.num_captions_per_image)

        # 4. Update the trackers
        self.loss_tracker.update_state(batch_loss)
        self.acc_tracker.update_state(batch_acc)

        # 5. Return the loss and accuracy values
        return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self.loss_tracker, self.acc_tracker]


cnn_model = get_cnn_model()
encoder = TransformerEncoderBlock(embed_dim=EMBED_DIM, dense_dim=FF_DIM, num_heads=1)
decoder = TransformerDecoderBlock(embed_dim=EMBED_DIM, ff_dim=FF_DIM, num_heads=2)
caption_model = ImageCaptioningModel(
    cnn_model=cnn_model, encoder=encoder, decoder=decoder, image_aug=image_augmentation,
)


CNN Model Summary:
Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
rescaling_7 (Rescaling)         (None, 224, 224, 3)  0           input_8[0][0]                    
__________________________________________________________________________________________________
normalization_7 (Normalization) (None, 224, 224, 3)  7           rescaling_7[0][0]                
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 225, 225, 3)  0           normalization_7[0][0]            
________________________________________________________________________

The loss function and early stopping is defined. The model is compiled with the same.

In [238]:
# Define the loss function
cross_entropy = keras.losses.SparseCategoricalCrossentropy(
    from_logits=False, reduction="none"
)

# EarlyStopping criteria
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)


# Learning Rate Scheduler for the optimizer
class LRSchedule(keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, post_warmup_learning_rate, warmup_steps):
        super().__init__()
        self.post_warmup_learning_rate = post_warmup_learning_rate
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        global_step = tf.cast(step, tf.float32)
        warmup_steps = tf.cast(self.warmup_steps, tf.float32)
        warmup_progress = global_step / warmup_steps
        warmup_learning_rate = self.post_warmup_learning_rate * warmup_progress
        return tf.cond(
            global_step < warmup_steps,
            lambda: warmup_learning_rate,
            lambda: self.post_warmup_learning_rate,
        )


# Create a learning rate schedule
num_train_steps = len(train_dataset) * EPOCHS
num_warmup_steps = num_train_steps // 15
lr_schedule = LRSchedule(post_warmup_learning_rate=1e-4, warmup_steps=num_warmup_steps)

# Compile the model
caption_model.compile(optimizer=keras.optimizers.Adam(lr_schedule), loss=cross_entropy)

In [239]:
# Take a single batch from the validation dataset
for img_batch, cap_batch in valid_dataset.take(1):
    print(f"Validation batch image shape: {img_batch.shape}, Caption shape: {cap_batch.shape}")
    caption_model.test_step((img_batch, cap_batch))


Validation batch image shape: (25, 224, 224, 3), Caption shape: (25, 2, 12)
Validation Image Batch Shape: (25, 224, 224, 3)
Validation Sequence Batch Shape: (25, 2, 12)
Image Embeddings Shape: (25, 256)
Reshaped Image Embeddings Shape: (25, 1, 256)
Validation Sequence Input Shape: (25, 11)
Validation Sequence True Shape: (25, 11)
Batch Sequence Input Shape before slicing: (25, 12)
Encoder Input Shape: (25, 1, 256)
Encoder Input Shape before LayerNorm: (25, 1, 256)
Encoder Input Shape after LayerNorm: (25, 1, 256)




Encoder Output Shape: (25, 1, 256)
Batch Sequence Input Shape: (25, 11)
Batch Sequence True Shape: (25, 11)
Decoder Input Shape: (25, 11)
Positional Embedding Input Shape: (25, 11)
Positional Embedding Output Shape: (25, 11, 256)
Decoder Output Shape: (25, 11, 3138)
Batch Sequence Predicted Shape: (25, 11, 3138)
Validation Sequence Input Shape: (25, 11)
Validation Sequence True Shape: (25, 11)
Batch Sequence Input Shape before slicing: (25, 12)
Encoder Input Shape: (25, 1, 256)
Encoder Input Shape before LayerNorm: (25, 1, 256)
Encoder Input Shape after LayerNorm: (25, 1, 256)
Encoder Output Shape: (25, 1, 256)
Batch Sequence Input Shape: (25, 11)
Batch Sequence True Shape: (25, 11)
Decoder Input Shape: (25, 11)
Positional Embedding Input Shape: (25, 11)
Positional Embedding Output Shape: (25, 11, 256)
Decoder Output Shape: (25, 11, 3138)
Batch Sequence Predicted Shape: (25, 11, 3138)


# Model training and testing

Sets the version and checks if the model has been previously trained or being trained for the first time. If saved weights are found, they're loaded.

In [240]:
mdx = '231005'  # Sets the version
tmpx = f'/results/Model_weights/{mdx}/Temp/'

# Check if the directory exists
if not os.path.exists(tmpx):
    print(f"Directory {tmpx} does not exist. Creating the directory.")
    os.makedirs(tmpx)  # Create the directory if it doesn't exist

# Now check for the files
try:
    weight_path = f'{tmpx}imgcap_{mdx}'
    fls = os.listdir(tmpx)

    # Look for specific weight files (like .index or .data-00000-of-00001)
    checkpoint_files = [f for f in fls if "imgcap_" in f]
    
    if len(checkpoint_files) > 0:
        print("Found saved weights, loading them now...")
        caption_model.load_weights(weight_path)
        print("Saved weights loaded successfully")
    else:
        print("No saved weights found, training from scratch")
except FileNotFoundError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Directory /results/Model_weights/231005/Temp/ does not exist. Creating the directory.
No saved weights found, training from scratch


Lookup dictionary created and output sequence length is set.

In [241]:
# Load the vocabulary during inference
with open('/results/vocab.json', 'r', encoding='utf-8') as f:
    vocab = json.load(f)

# Recreate the vectorization object and set its vocabulary
vectorization = TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=SEQ_LENGTH,
    standardize=custom_standardization,
)

# Set the vocabulary during inference
vectorization.set_vocabulary(vocab)

print(f"Vocabulary loaded with {len(vocab)} tokens.")

index_lookup = dict(zip(range(len(vocab)), vocab))
max_decoded_sentence_length = SEQ_LENGTH - 1
valid_images = list(valid_data.keys())

Vocabulary loaded with 3138 tokens.


The function reads an image from the given path. It uses the image to generate an caption. The same are displayed.

In [242]:
# Handle invalid token indices
def generate_caption(dt, ix):
    # Select a random image from the validation dataset
    sample_img = f'{dt}images/{ix}'

    # Read the image from the disk
    sample_img = decode_and_resize(sample_img)
    img = sample_img.numpy().clip(0, 255).astype(np.uint8)
    plt.axis('off')
    plt.imshow(img)
    plt.show()
    
    # Pass the image to the CNN
    img = tf.expand_dims(sample_img, 0)
    img = caption_model.cnn_model(img)

    # Expand dimensions to make it compatible with the encoder
    img = tf.expand_dims(img, 1)  # Adding sequence dimension, shape becomes (batch_size, 1, embed_dim)

    # Pass the image features to the Transformer encoder
    encoded_img = caption_model.encoder(img, training=False)

    # Generate the caption using the Transformer decoder
    decoded_caption = "<start> "
    for i in range(max_decoded_sentence_length):
        # Ensure decoded_caption is passed as a list of strings
        tokenized_caption = vectorization(tf.constant([decoded_caption]))[:, :-1]
        
        # Create mask for the tokenized caption
        mask = tf.math.not_equal(tokenized_caption, 0)

        predictions = caption_model.decoder(
            tokenized_caption, encoded_img, training=False, mask=mask
        )

        # Get the predicted token
        sampled_token_index = np.argmax(predictions[0, i, :])

        # Check if sampled_token_index is in the vocabulary range
        if sampled_token_index >= len(vocab):
            print(f"Warning: Token index {sampled_token_index} out of range")
            continue  # Skip to the next iteration if the token is out of range

        sampled_token = index_lookup[sampled_token_index]

        # Handle noisy or unknown tokens
        if sampled_token in ("[UNK]", ""):
            print(f"Warning: Encountered noisy token '{sampled_token}'. Skipping.")
            continue  # Skip this token

        if sampled_token == "<end>":
            break

        decoded_caption += " " + sampled_token

    # Clean up the decoded caption
    decoded_caption = (
        decoded_caption.replace("<start> ", "")
        .replace(" <end>", "")
        .replace("[UNK]", "")
        .strip()
    )
    print("\nPredicted Caption: ", decoded_caption)
    print()


The model is trained.

In [243]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Define checkpoint callback
checkpoint_cb = ModelCheckpoint(
    filepath=f'{tmpx}imgcap_{mdx}', 
    save_weights_only=True, 
    save_best_only=True
)

try:
    # Fit the model
    caption_model.fit(
        train_dataset,
        epochs=EPOCHS,
        validation_data=valid_dataset,
        callbacks=[early_stopping, checkpoint_cb],
    )
except ValueError as e:
    print(f"ValueError during model fitting: {e}")

Epoch 1/15
Training Image Batch Shape: (None, 224, 224, 3)
Training Sequence Batch Shape: (None, None, 12)
Image Embeddings Shape: (None, 256)
Reshaped Image Embeddings for Encoder: (None, 1, 256)
Updated Sequence Shape: (None, None, 12)
Batch Sequence Input Shape before slicing: (None, 12)
Encoder Input Shape: (None, 1, 256)
Encoder Input Shape before LayerNorm: (None, 1, 256)
Encoder Input Shape after LayerNorm: (None, 1, 256)
Encoder Output Shape: (None, 1, 256)
Batch Sequence Input Shape: (None, 11)
Batch Sequence True Shape: (None, 11)
Decoder Input Shape: (None, 11)
Positional Embedding Input Shape: (None, 11)
Positional Embedding Output Shape: (None, 11, 256)
Decoder Output Shape: (None, 11, 3138)
Batch Sequence Predicted Shape: (None, 11, 3138)
Batch Sequence Input Shape before slicing: (None, 12)
Encoder Input Shape: (None, 1, 256)
Encoder Input Shape before LayerNorm: (None, 1, 256)
Encoder Input Shape after LayerNorm: (None, 1, 256)
Encoder Output Shape: (None, 1, 256)
Batch



  6/293 [..............................] - ETA: 53s - loss: 8.2030 - acc: 1.7869e-04 



 11/293 [>.............................] - ETA: 46s - loss: 8.2046 - acc: 3.3606e-04



 14/293 [>.............................] - ETA: 44s - loss: 8.2123 - acc: 4.1295e-04



 15/293 [>.............................] - ETA: 43s - loss: 8.2142 - acc: 4.2882e-04



 17/293 [>.............................] - ETA: 43s - loss: 8.2119 - acc: 4.4807e-04



 21/293 [=>............................] - ETA: 42s - loss: 8.2056 - acc: 4.8423e-04



 23/293 [=>............................] - ETA: 41s - loss: 8.2052 - acc: 4.9006e-04



 25/293 [=>............................] - ETA: 41s - loss: 8.2054 - acc: 5.0023e-04



 28/293 [=>............................] - ETA: 40s - loss: 8.2038 - acc: 5.0667e-04



 31/293 [==>...........................] - ETA: 39s - loss: 8.2013 - acc: 5.0643e-04



 36/293 [==>...........................] - ETA: 38s - loss: 8.1943 - acc: 4.9795e-04



 38/293 [==>...........................] - ETA: 38s - loss: 8.1904 - acc: 4.9488e-04



 40/293 [===>..........................] - ETA: 37s - loss: 8.1888 - acc: 4.9279e-04



 42/293 [===>..........................] - ETA: 37s - loss: 8.1857 - acc: 4.8986e-04



 44/293 [===>..........................] - ETA: 37s - loss: 8.1831 - acc: 4.8629e-04



 47/293 [===>..........................] - ETA: 36s - loss: 8.1777 - acc: 4.8009e-04



 49/293 [====>.........................] - ETA: 36s - loss: 8.1749 - acc: 4.7555e-04



 52/293 [====>.........................] - ETA: 35s - loss: 8.1688 - acc: 4.7083e-04



 54/293 [====>.........................] - ETA: 35s - loss: 8.1640 - acc: 4.6730e-04



 56/293 [====>.........................] - ETA: 35s - loss: 8.1603 - acc: 4.6489e-04



 60/293 [=====>........................] - ETA: 34s - loss: 8.1534 - acc: 4.5920e-04



 62/293 [=====>........................] - ETA: 34s - loss: 8.1488 - acc: 4.5667e-04



 64/293 [=====>........................] - ETA: 33s - loss: 8.1422 - acc: 4.5795e-04



 66/293 [=====>........................] - ETA: 33s - loss: 8.1372 - acc: 4.5979e-04



 67/293 [=====>........................] - ETA: 33s - loss: 8.1349 - acc: 4.6102e-04



















































































































































































































































































Validation Image Batch Shape: (None, 224, 224, 3)
Validation Sequence Batch Shape: (None, None, 12)
Image Embeddings Shape: (None, 256)
Reshaped Image Embeddings Shape: (None, 1, 256)
Validation Sequence Input Shape: (None, 11)
Validation Sequence True Shape: (None, 11)
Batch Sequence Input Shape before slicing: (None, 12)
Encoder Input Shape: (None, 1, 256)
Encoder Input Shape before LayerNorm: (None, 1, 256)
Encoder Input Shape after LayerNorm: (None, 1, 256)
Encoder Output Shape: (None, 1, 256)
Batch Sequence Input Shape: (None, 11)
Batch Sequence True Shape: (None, 11)
Decoder Input Shape: (None, 11)
Positional Embedding Input Shape: (None, 11)
Positional Embedding Output Shape: (None, 11, 256)
Decoder Output Shape: (None, 11, 3138)
Batch Sequence Predicted Shape: (None, 11, 3138)
Validation Sequence Input Shape: (None, 11)
Validation Sequence True Shape: (None, 11)
Batch Sequence Input Shape before slicing: (None, 12)
Encoder Input Shape: (None, 1, 256)
Encoder Input Shape before 



Epoch 2/15
  5/293 [..............................] - ETA: 47s - loss: 5.2664 - acc: 0.1581 



  7/293 [..............................] - ETA: 46s - loss: 5.2376 - acc: 0.1606



  9/293 [..............................] - ETA: 46s - loss: 5.2047 - acc: 0.1628



 11/293 [>.............................] - ETA: 46s - loss: 5.1697 - acc: 0.1644



 13/293 [>.............................] - ETA: 45s - loss: 5.1261 - acc: 0.1663



 16/293 [>.............................] - ETA: 44s - loss: 5.1340 - acc: 0.1685



 17/293 [>.............................] - ETA: 44s - loss: 5.1203 - acc: 0.1692



 18/293 [>.............................] - ETA: 43s - loss: 5.1104 - acc: 0.1698



 20/293 [=>............................] - ETA: 43s - loss: 5.1164 - acc: 0.1709



 22/293 [=>............................] - ETA: 42s - loss: 5.0931 - acc: 0.1720



 27/293 [=>............................] - ETA: 41s - loss: 5.0766 - acc: 0.1742



 29/293 [=>............................] - ETA: 41s - loss: 5.0809 - acc: 0.1749



 32/293 [==>...........................] - ETA: 40s - loss: 5.0824 - acc: 0.1757



 36/293 [==>...........................] - ETA: 38s - loss: 5.0853 - acc: 0.1766



 38/293 [==>...........................] - ETA: 38s - loss: 5.0841 - acc: 0.1770



 41/293 [===>..........................] - ETA: 37s - loss: 5.0860 - acc: 0.1776



 42/293 [===>..........................] - ETA: 37s - loss: 5.0880 - acc: 0.1777



 44/293 [===>..........................] - ETA: 37s - loss: 5.0870 - acc: 0.1781



 45/293 [===>..........................] - ETA: 37s - loss: 5.0826 - acc: 0.1782



 48/293 [===>..........................] - ETA: 36s - loss: 5.0738 - acc: 0.1788



 51/293 [====>.........................] - ETA: 36s - loss: 5.0663 - acc: 0.1793



 54/293 [====>.........................] - ETA: 35s - loss: 5.0551 - acc: 0.1798



 55/293 [====>.........................] - ETA: 35s - loss: 5.0535 - acc: 0.1800



 60/293 [=====>........................] - ETA: 34s - loss: 5.0436 - acc: 0.1809



 61/293 [=====>........................] - ETA: 34s - loss: 5.0389 - acc: 0.1811



 63/293 [=====>........................] - ETA: 34s - loss: 5.0378 - acc: 0.1814



 65/293 [=====>........................] - ETA: 34s - loss: 5.0361 - acc: 0.1818



 67/293 [=====>........................] - ETA: 33s - loss: 5.0359 - acc: 0.1821



























































































































































































































































































Epoch 3/15




  2/293 [..............................] - ETA: 48s - loss: 4.3372 - acc: 0.2841 



  5/293 [..............................] - ETA: 49s - loss: 4.3616 - acc: 0.2834



  8/293 [..............................] - ETA: 45s - loss: 4.3699 - acc: 0.2807



 13/293 [>.............................] - ETA: 43s - loss: 4.3183 - acc: 0.2802



 15/293 [>.............................] - ETA: 42s - loss: 4.3048 - acc: 0.2805



 16/293 [>.............................] - ETA: 42s - loss: 4.2945 - acc: 0.2805



 18/293 [>.............................] - ETA: 41s - loss: 4.2822 - acc: 0.2806



 20/293 [=>............................] - ETA: 40s - loss: 4.2921 - acc: 0.2809



 22/293 [=>............................] - ETA: 40s - loss: 4.2884 - acc: 0.2812



 24/293 [=>............................] - ETA: 39s - loss: 4.2966 - acc: 0.2814



 26/293 [=>............................] - ETA: 39s - loss: 4.2919 - acc: 0.2816



 29/293 [=>............................] - ETA: 39s - loss: 4.2910 - acc: 0.2819



 32/293 [==>...........................] - ETA: 38s - loss: 4.2814 - acc: 0.2822



 35/293 [==>...........................] - ETA: 38s - loss: 4.2850 - acc: 0.2825



 38/293 [==>...........................] - ETA: 37s - loss: 4.2899 - acc: 0.2827



 43/293 [===>..........................] - ETA: 36s - loss: 4.2672 - acc: 0.2832



 45/293 [===>..........................] - ETA: 36s - loss: 4.2621 - acc: 0.2834



 47/293 [===>..........................] - ETA: 36s - loss: 4.2679 - acc: 0.2835



 49/293 [====>.........................] - ETA: 35s - loss: 4.2694 - acc: 0.2837



 50/293 [====>.........................] - ETA: 35s - loss: 4.2721 - acc: 0.2837



 52/293 [====>.........................] - ETA: 35s - loss: 4.2705 - acc: 0.2838



 54/293 [====>.........................] - ETA: 35s - loss: 4.2693 - acc: 0.2839



 57/293 [====>.........................] - ETA: 34s - loss: 4.2645 - acc: 0.2841



 58/293 [====>.........................] - ETA: 34s - loss: 4.2646 - acc: 0.2841



 60/293 [=====>........................] - ETA: 34s - loss: 4.2621 - acc: 0.2843



 64/293 [=====>........................] - ETA: 33s - loss: 4.2535 - acc: 0.2845



 67/293 [=====>........................] - ETA: 32s - loss: 4.2518 - acc: 0.2847































































































































































































































































































Epoch 4/15
  2/293 [..............................] - ETA: 49s - loss: 3.9032 - acc: 0.3183 



  7/293 [..............................] - ETA: 55s - loss: 3.9377 - acc: 0.3168 



  8/293 [..............................] - ETA: 54s - loss: 3.9203 - acc: 0.3163



 10/293 [>.............................] - ETA: 51s - loss: 3.9312 - acc: 0.3156



 12/293 [>.............................] - ETA: 50s - loss: 3.9314 - acc: 0.3154



 14/293 [>.............................] - ETA: 48s - loss: 3.9217 - acc: 0.3152



 16/293 [>.............................] - ETA: 47s - loss: 3.9100 - acc: 0.3149



 21/293 [=>............................] - ETA: 44s - loss: 3.8970 - acc: 0.3146



 23/293 [=>............................] - ETA: 43s - loss: 3.8941 - acc: 0.3147



 26/293 [=>............................] - ETA: 42s - loss: 3.9020 - acc: 0.3146



 28/293 [=>............................] - ETA: 42s - loss: 3.9026 - acc: 0.3146



 30/293 [==>...........................] - ETA: 41s - loss: 3.8929 - acc: 0.3147



 32/293 [==>...........................] - ETA: 40s - loss: 3.8837 - acc: 0.3149



 35/293 [==>...........................] - ETA: 39s - loss: 3.8983 - acc: 0.3150



 37/293 [==>...........................] - ETA: 39s - loss: 3.8941 - acc: 0.3150



 41/293 [===>..........................] - ETA: 38s - loss: 3.8989 - acc: 0.3151



 43/293 [===>..........................] - ETA: 38s - loss: 3.8940 - acc: 0.3152



 47/293 [===>..........................] - ETA: 37s - loss: 3.8951 - acc: 0.3153



 50/293 [====>.........................] - ETA: 36s - loss: 3.8881 - acc: 0.3154



 52/293 [====>.........................] - ETA: 36s - loss: 3.8929 - acc: 0.3154



 54/293 [====>.........................] - ETA: 36s - loss: 3.8846 - acc: 0.3155



 56/293 [====>.........................] - ETA: 35s - loss: 3.8858 - acc: 0.3156



 58/293 [====>.........................] - ETA: 35s - loss: 3.8834 - acc: 0.3157



 59/293 [=====>........................] - ETA: 35s - loss: 3.8821 - acc: 0.3157



 62/293 [=====>........................] - ETA: 34s - loss: 3.8798 - acc: 0.3158



 64/293 [=====>........................] - ETA: 34s - loss: 3.8792 - acc: 0.3159



 66/293 [=====>........................] - ETA: 33s - loss: 3.8789 - acc: 0.3160























































































































































































































































































Epoch 5/15
  1/293 [..............................] - ETA: 1:25 - loss: 3.8547 - acc: 0.3284



  2/293 [..............................] - ETA: 46s - loss: 3.7739 - acc: 0.3296 



  6/293 [..............................] - ETA: 58s - loss: 3.7813 - acc: 0.3329 



  9/293 [..............................] - ETA: 53s - loss: 3.7471 - acc: 0.3322



 11/293 [>.............................] - ETA: 52s - loss: 3.7464 - acc: 0.3329



 14/293 [>.............................] - ETA: 52s - loss: 3.7351 - acc: 0.3334



 16/293 [>.............................] - ETA: 50s - loss: 3.7323 - acc: 0.3337



 20/293 [=>............................] - ETA: 48s - loss: 3.7103 - acc: 0.3344



 22/293 [=>............................] - ETA: 47s - loss: 3.7000 - acc: 0.3347



 23/293 [=>............................] - ETA: 47s - loss: 3.6896 - acc: 0.3349



 26/293 [=>............................] - ETA: 45s - loss: 3.6822 - acc: 0.3352



 28/293 [=>............................] - ETA: 45s - loss: 3.6823 - acc: 0.3354



 31/293 [==>...........................] - ETA: 44s - loss: 3.6800 - acc: 0.3357



 34/293 [==>...........................] - ETA: 43s - loss: 3.6899 - acc: 0.3360



 35/293 [==>...........................] - ETA: 42s - loss: 3.6899 - acc: 0.3360



 37/293 [==>...........................] - ETA: 42s - loss: 3.6886 - acc: 0.3362



 40/293 [===>..........................] - ETA: 41s - loss: 3.6840 - acc: 0.3363



 44/293 [===>..........................] - ETA: 40s - loss: 3.6798 - acc: 0.3365



 46/293 [===>..........................] - ETA: 40s - loss: 3.6833 - acc: 0.3366



 48/293 [===>..........................] - ETA: 39s - loss: 3.6845 - acc: 0.3366



 50/293 [====>.........................] - ETA: 39s - loss: 3.6828 - acc: 0.3366



 54/293 [====>.........................] - ETA: 38s - loss: 3.6884 - acc: 0.3367



 56/293 [====>.........................] - ETA: 38s - loss: 3.6827 - acc: 0.3367



 57/293 [====>.........................] - ETA: 37s - loss: 3.6823 - acc: 0.3368



 59/293 [=====>........................] - ETA: 37s - loss: 3.6775 - acc: 0.3368



 63/293 [=====>........................] - ETA: 36s - loss: 3.6759 - acc: 0.3370



 64/293 [=====>........................] - ETA: 36s - loss: 3.6757 - acc: 0.3370



 66/293 [=====>........................] - ETA: 36s - loss: 3.6751 - acc: 0.3370



 68/293 [=====>........................] - ETA: 35s - loss: 3.6721 - acc: 0.3371















































































































































































































































































































Epoch 6/15




  3/293 [..............................] - ETA: 48s - loss: 3.6153 - acc: 0.3652 



  5/293 [..............................] - ETA: 45s - loss: 3.6434 - acc: 0.3583



  9/293 [..............................] - ETA: 43s - loss: 3.6141 - acc: 0.3514



 11/293 [>.............................] - ETA: 42s - loss: 3.5954 - acc: 0.3503



 14/293 [>.............................] - ETA: 41s - loss: 3.5696 - acc: 0.3490



 15/293 [>.............................] - ETA: 41s - loss: 3.5507 - acc: 0.3489



 17/293 [>.............................] - ETA: 41s - loss: 3.5318 - acc: 0.3492



 19/293 [>.............................] - ETA: 40s - loss: 3.5501 - acc: 0.3492



 21/293 [=>............................] - ETA: 40s - loss: 3.5552 - acc: 0.3487



 24/293 [=>............................] - ETA: 39s - loss: 3.5542 - acc: 0.3484



 27/293 [=>............................] - ETA: 38s - loss: 3.5304 - acc: 0.3484



 29/293 [=>............................] - ETA: 38s - loss: 3.5304 - acc: 0.3485



 31/293 [==>...........................] - ETA: 38s - loss: 3.5299 - acc: 0.3486



 32/293 [==>...........................] - ETA: 37s - loss: 3.5178 - acc: 0.3487



 34/293 [==>...........................] - ETA: 37s - loss: 3.5242 - acc: 0.3490



 36/293 [==>...........................] - ETA: 37s - loss: 3.5314 - acc: 0.3491



 38/293 [==>...........................] - ETA: 37s - loss: 3.5200 - acc: 0.3493



 43/293 [===>..........................] - ETA: 36s - loss: 3.5165 - acc: 0.3497



 45/293 [===>..........................] - ETA: 36s - loss: 3.5164 - acc: 0.3498



 46/293 [===>..........................] - ETA: 36s - loss: 3.5141 - acc: 0.3499



 48/293 [===>..........................] - ETA: 36s - loss: 3.5133 - acc: 0.3501



 50/293 [====>.........................] - ETA: 35s - loss: 3.5117 - acc: 0.3502



 52/293 [====>.........................] - ETA: 35s - loss: 3.5114 - acc: 0.3504



 54/293 [====>.........................] - ETA: 35s - loss: 3.5121 - acc: 0.3505



 55/293 [====>.........................] - ETA: 35s - loss: 3.5131 - acc: 0.3506



 61/293 [=====>........................] - ETA: 34s - loss: 3.5081 - acc: 0.3509



 63/293 [=====>........................] - ETA: 33s - loss: 3.5119 - acc: 0.3510



 65/293 [=====>........................] - ETA: 33s - loss: 3.5120 - acc: 0.3511



 68/293 [=====>........................] - ETA: 33s - loss: 3.5124 - acc: 0.3512











































































































































































































































































































Epoch 7/15
  2/293 [..............................] - ETA: 46s - loss: 3.3484 - acc: 0.3512 



  3/293 [..............................] - ETA: 48s - loss: 3.3784 - acc: 0.3514



  5/293 [..............................] - ETA: 57s - loss: 3.3936 - acc: 0.3523 



 10/293 [>.............................] - ETA: 50s - loss: 3.3690 - acc: 0.3551



 13/293 [>.............................] - ETA: 49s - loss: 3.3919 - acc: 0.3567



 15/293 [>.............................] - ETA: 48s - loss: 3.3841 - acc: 0.3573



 16/293 [>.............................] - ETA: 47s - loss: 3.3707 - acc: 0.3576



 18/293 [>.............................] - ETA: 47s - loss: 3.3803 - acc: 0.3581



 22/293 [=>............................] - ETA: 47s - loss: 3.3756 - acc: 0.3589



 23/293 [=>............................] - ETA: 46s - loss: 3.3872 - acc: 0.3591



 24/293 [=>............................] - ETA: 46s - loss: 3.3830 - acc: 0.3593



 26/293 [=>............................] - ETA: 45s - loss: 3.3932 - acc: 0.3595



 28/293 [=>............................] - ETA: 45s - loss: 3.3774 - acc: 0.3599



 29/293 [=>............................] - ETA: 44s - loss: 3.3831 - acc: 0.3600



 31/293 [==>...........................] - ETA: 44s - loss: 3.3853 - acc: 0.3602



 35/293 [==>...........................] - ETA: 42s - loss: 3.3834 - acc: 0.3606



 36/293 [==>...........................] - ETA: 42s - loss: 3.3828 - acc: 0.3607



 40/293 [===>..........................] - ETA: 41s - loss: 3.3759 - acc: 0.3610



 42/293 [===>..........................] - ETA: 40s - loss: 3.3825 - acc: 0.3611



 45/293 [===>..........................] - ETA: 40s - loss: 3.3894 - acc: 0.3613



 47/293 [===>..........................] - ETA: 39s - loss: 3.3842 - acc: 0.3614



 49/293 [====>.........................] - ETA: 39s - loss: 3.3897 - acc: 0.3614



 51/293 [====>.........................] - ETA: 39s - loss: 3.3905 - acc: 0.3615



 53/293 [====>.........................] - ETA: 38s - loss: 3.3885 - acc: 0.3615



 54/293 [====>.........................] - ETA: 38s - loss: 3.3927 - acc: 0.3616



 56/293 [====>.........................] - ETA: 37s - loss: 3.3860 - acc: 0.3616



 58/293 [====>.........................] - ETA: 37s - loss: 3.3863 - acc: 0.3617



 63/293 [=====>........................] - ETA: 36s - loss: 3.3933 - acc: 0.3618



 67/293 [=====>........................] - ETA: 35s - loss: 3.3866 - acc: 0.3620























































































































































































































































































Epoch 8/15
  4/293 [..............................] - ETA: 1:08 - loss: 3.3739 - acc: 0.3602



  6/293 [..............................] - ETA: 59s - loss: 3.3590 - acc: 0.3641 



  8/293 [..............................] - ETA: 55s - loss: 3.3941 - acc: 0.3644



 12/293 [>.............................] - ETA: 50s - loss: 3.3695 - acc: 0.3650



 14/293 [>.............................] - ETA: 49s - loss: 3.3588 - acc: 0.3647



 17/293 [>.............................] - ETA: 48s - loss: 3.3241 - acc: 0.3642



 19/293 [>.............................] - ETA: 48s - loss: 3.3057 - acc: 0.3644



 20/293 [=>............................] - ETA: 47s - loss: 3.2992 - acc: 0.3646



 23/293 [=>............................] - ETA: 45s - loss: 3.3076 - acc: 0.3653



 26/293 [=>............................] - ETA: 44s - loss: 3.2901 - acc: 0.3661



 28/293 [=>............................] - ETA: 43s - loss: 3.2986 - acc: 0.3665



 30/293 [==>...........................] - ETA: 42s - loss: 3.2941 - acc: 0.3669



 32/293 [==>...........................] - ETA: 42s - loss: 3.2819 - acc: 0.3672



 34/293 [==>...........................] - ETA: 41s - loss: 3.2814 - acc: 0.3677



 35/293 [==>...........................] - ETA: 41s - loss: 3.2904 - acc: 0.3678



 37/293 [==>...........................] - ETA: 40s - loss: 3.3031 - acc: 0.3681



 41/293 [===>..........................] - ETA: 39s - loss: 3.2926 - acc: 0.3686



 43/293 [===>..........................] - ETA: 39s - loss: 3.2925 - acc: 0.3689



 45/293 [===>..........................] - ETA: 38s - loss: 3.2947 - acc: 0.3691



 47/293 [===>..........................] - ETA: 38s - loss: 3.2941 - acc: 0.3694



 49/293 [====>.........................] - ETA: 37s - loss: 3.2910 - acc: 0.3696



 52/293 [====>.........................] - ETA: 37s - loss: 3.2816 - acc: 0.3699



 54/293 [====>.........................] - ETA: 36s - loss: 3.2874 - acc: 0.3702



 56/293 [====>.........................] - ETA: 36s - loss: 3.2778 - acc: 0.3704



 59/293 [=====>........................] - ETA: 35s - loss: 3.2821 - acc: 0.3707



 61/293 [=====>........................] - ETA: 35s - loss: 3.2847 - acc: 0.3708



 63/293 [=====>........................] - ETA: 35s - loss: 3.2795 - acc: 0.3710



 67/293 [=====>........................] - ETA: 34s - loss: 3.2817 - acc: 0.3713











































































































































































































































































Epoch 9/15




  1/293 [..............................] - ETA: 2:25 - loss: 3.2861 - acc: 0.3549



  5/293 [..............................] - ETA: 45s - loss: 3.3314 - acc: 0.3624 



  8/293 [..............................] - ETA: 45s - loss: 3.3023 - acc: 0.3648



 10/293 [>.............................] - ETA: 45s - loss: 3.2960 - acc: 0.3659



 13/293 [>.............................] - ETA: 43s - loss: 3.2735 - acc: 0.3669



 15/293 [>.............................] - ETA: 42s - loss: 3.2513 - acc: 0.3678



 17/293 [>.............................] - ETA: 42s - loss: 3.2224 - acc: 0.3688



 19/293 [>.............................] - ETA: 41s - loss: 3.2320 - acc: 0.3699



 21/293 [=>............................] - ETA: 41s - loss: 3.2348 - acc: 0.3705



 23/293 [=>............................] - ETA: 40s - loss: 3.2312 - acc: 0.3712



 25/293 [=>............................] - ETA: 40s - loss: 3.2235 - acc: 0.3717



 28/293 [=>............................] - ETA: 39s - loss: 3.2060 - acc: 0.3726



 30/293 [==>...........................] - ETA: 39s - loss: 3.2145 - acc: 0.3731



 35/293 [==>...........................] - ETA: 38s - loss: 3.2013 - acc: 0.3745



 37/293 [==>...........................] - ETA: 37s - loss: 3.2123 - acc: 0.3749



 39/293 [==>...........................] - ETA: 37s - loss: 3.2142 - acc: 0.3753



 41/293 [===>..........................] - ETA: 37s - loss: 3.2105 - acc: 0.3756



 44/293 [===>..........................] - ETA: 36s - loss: 3.1972 - acc: 0.3762



 47/293 [===>..........................] - ETA: 36s - loss: 3.1978 - acc: 0.3768



 48/293 [===>..........................] - ETA: 35s - loss: 3.2032 - acc: 0.3770



 53/293 [====>.........................] - ETA: 35s - loss: 3.2019 - acc: 0.3777



 55/293 [====>.........................] - ETA: 34s - loss: 3.2053 - acc: 0.3780



 56/293 [====>.........................] - ETA: 34s - loss: 3.2037 - acc: 0.3781



 58/293 [====>.........................] - ETA: 34s - loss: 3.1981 - acc: 0.3784



 60/293 [=====>........................] - ETA: 34s - loss: 3.1988 - acc: 0.3787



 62/293 [=====>........................] - ETA: 33s - loss: 3.1956 - acc: 0.3790



 66/293 [=====>........................] - ETA: 33s - loss: 3.2061 - acc: 0.3794







































































































































































































































































































Epoch 10/15
  5/293 [..............................] - ETA: 43s - loss: 3.2024 - acc: 0.3923 



  6/293 [..............................] - ETA: 45s - loss: 3.1724 - acc: 0.3912



  8/293 [..............................] - ETA: 45s - loss: 3.1437 - acc: 0.3902



 10/293 [>.............................] - ETA: 46s - loss: 3.1573 - acc: 0.3893



 12/293 [>.............................] - ETA: 45s - loss: 3.1563 - acc: 0.3881



 15/293 [>.............................] - ETA: 45s - loss: 3.1432 - acc: 0.3874



 17/293 [>.............................] - ETA: 44s - loss: 3.1525 - acc: 0.3873



 18/293 [>.............................] - ETA: 44s - loss: 3.1558 - acc: 0.3874



 19/293 [>.............................] - ETA: 44s - loss: 3.1443 - acc: 0.3875



 22/293 [=>............................] - ETA: 43s - loss: 3.1226 - acc: 0.3876



 23/293 [=>............................] - ETA: 43s - loss: 3.1265 - acc: 0.3877



 26/293 [=>............................] - ETA: 42s - loss: 3.1321 - acc: 0.3879



 28/293 [=>............................] - ETA: 41s - loss: 3.1388 - acc: 0.3880



 30/293 [==>...........................] - ETA: 41s - loss: 3.1241 - acc: 0.3881



 31/293 [==>...........................] - ETA: 40s - loss: 3.1240 - acc: 0.3882



 35/293 [==>...........................] - ETA: 39s - loss: 3.1390 - acc: 0.3883



 37/293 [==>...........................] - ETA: 39s - loss: 3.1431 - acc: 0.3883



 39/293 [==>...........................] - ETA: 39s - loss: 3.1426 - acc: 0.3882



 41/293 [===>..........................] - ETA: 38s - loss: 3.1456 - acc: 0.3882



 44/293 [===>..........................] - ETA: 38s - loss: 3.1427 - acc: 0.3882



 50/293 [====>.........................] - ETA: 36s - loss: 3.1348 - acc: 0.3883



 51/293 [====>.........................] - ETA: 36s - loss: 3.1336 - acc: 0.3883



 54/293 [====>.........................] - ETA: 36s - loss: 3.1325 - acc: 0.3884



 57/293 [====>.........................] - ETA: 35s - loss: 3.1248 - acc: 0.3884



 59/293 [=====>........................] - ETA: 35s - loss: 3.1219 - acc: 0.3885



 61/293 [=====>........................] - ETA: 35s - loss: 3.1202 - acc: 0.3886







































































































































































































































































































Epoch 11/15




  3/293 [..............................] - ETA: 45s - loss: 3.1284 - acc: 0.3806 



  8/293 [..............................] - ETA: 43s - loss: 3.1095 - acc: 0.3801



 10/293 [>.............................] - ETA: 44s - loss: 3.0749 - acc: 0.3817



 13/293 [>.............................] - ETA: 43s - loss: 3.0597 - acc: 0.3834



 14/293 [>.............................] - ETA: 43s - loss: 3.0615 - acc: 0.3838



 17/293 [>.............................] - ETA: 43s - loss: 3.0530 - acc: 0.3849



 19/293 [>.............................] - ETA: 42s - loss: 3.0570 - acc: 0.3855



 21/293 [=>............................] - ETA: 41s - loss: 3.0541 - acc: 0.3862



 25/293 [=>............................] - ETA: 40s - loss: 3.0510 - acc: 0.3875



 27/293 [=>............................] - ETA: 40s - loss: 3.0575 - acc: 0.3880



 29/293 [=>............................] - ETA: 39s - loss: 3.0683 - acc: 0.3883



 32/293 [==>...........................] - ETA: 39s - loss: 3.0670 - acc: 0.3886



 34/293 [==>...........................] - ETA: 38s - loss: 3.0652 - acc: 0.3888



 38/293 [==>...........................] - ETA: 38s - loss: 3.0647 - acc: 0.3892



 41/293 [===>..........................] - ETA: 37s - loss: 3.0604 - acc: 0.3894



 45/293 [===>..........................] - ETA: 37s - loss: 3.0580 - acc: 0.3896



 46/293 [===>..........................] - ETA: 37s - loss: 3.0596 - acc: 0.3897



 48/293 [===>..........................] - ETA: 36s - loss: 3.0558 - acc: 0.3898



 50/293 [====>.........................] - ETA: 36s - loss: 3.0547 - acc: 0.3899



 52/293 [====>.........................] - ETA: 36s - loss: 3.0477 - acc: 0.3900



 55/293 [====>.........................] - ETA: 35s - loss: 3.0495 - acc: 0.3901



 57/293 [====>.........................] - ETA: 35s - loss: 3.0546 - acc: 0.3901



 60/293 [=====>........................] - ETA: 34s - loss: 3.0536 - acc: 0.3902



 63/293 [=====>........................] - ETA: 34s - loss: 3.0519 - acc: 0.3903



 66/293 [=====>........................] - ETA: 34s - loss: 3.0512 - acc: 0.3904



 67/293 [=====>........................] - ETA: 33s - loss: 3.0536 - acc: 0.3904



































































































































































































































































































































Epoch 12/15




  3/293 [..............................] - ETA: 47s - loss: 2.9807 - acc: 0.3909 



  4/293 [..............................] - ETA: 46s - loss: 3.0183 - acc: 0.3922



  7/293 [..............................] - ETA: 47s - loss: 3.0325 - acc: 0.3909



 12/293 [>.............................] - ETA: 45s - loss: 3.0041 - acc: 0.3931



 13/293 [>.............................] - ETA: 45s - loss: 3.0087 - acc: 0.3933



 16/293 [>.............................] - ETA: 44s - loss: 3.0051 - acc: 0.3939



 17/293 [>.............................] - ETA: 44s - loss: 3.0181 - acc: 0.3941



 19/293 [>.............................] - ETA: 43s - loss: 3.0210 - acc: 0.3942



 22/293 [=>............................] - ETA: 42s - loss: 2.9928 - acc: 0.3946



 25/293 [=>............................] - ETA: 41s - loss: 3.0174 - acc: 0.3946



 28/293 [=>............................] - ETA: 40s - loss: 3.0132 - acc: 0.3946



 32/293 [==>...........................] - ETA: 39s - loss: 3.0219 - acc: 0.3947



 34/293 [==>...........................] - ETA: 39s - loss: 3.0225 - acc: 0.3947



 35/293 [==>...........................] - ETA: 39s - loss: 3.0232 - acc: 0.3947



 38/293 [==>...........................] - ETA: 38s - loss: 3.0234 - acc: 0.3947



 40/293 [===>..........................] - ETA: 38s - loss: 3.0169 - acc: 0.3947



 44/293 [===>..........................] - ETA: 37s - loss: 3.0073 - acc: 0.3948



 45/293 [===>..........................] - ETA: 37s - loss: 3.0090 - acc: 0.3948



 47/293 [===>..........................] - ETA: 37s - loss: 3.0057 - acc: 0.3948



 49/293 [====>.........................] - ETA: 37s - loss: 2.9996 - acc: 0.3949



 50/293 [====>.........................] - ETA: 36s - loss: 2.9937 - acc: 0.3950



 53/293 [====>.........................] - ETA: 36s - loss: 2.9903 - acc: 0.3952



 55/293 [====>.........................] - ETA: 35s - loss: 2.9923 - acc: 0.3954



 62/293 [=====>........................] - ETA: 34s - loss: 2.9844 - acc: 0.3960



 65/293 [=====>........................] - ETA: 34s - loss: 2.9836 - acc: 0.3962



 67/293 [=====>........................] - ETA: 33s - loss: 2.9913 - acc: 0.3963



















































































































































































































































































Epoch 13/15
  1/293 [..............................] - ETA: 1:20 - loss: 3.0546 - acc: 0.4000



  3/293 [..............................] - ETA: 47s - loss: 3.0476 - acc: 0.3923 



  5/293 [..............................] - ETA: 46s - loss: 3.0029 - acc: 0.3937



 11/293 [>.............................] - ETA: 47s - loss: 2.9957 - acc: 0.3985



 12/293 [>.............................] - ETA: 47s - loss: 2.9862 - acc: 0.3987



 14/293 [>.............................] - ETA: 46s - loss: 2.9607 - acc: 0.3997



 18/293 [>.............................] - ETA: 45s - loss: 2.9689 - acc: 0.4009



 19/293 [>.............................] - ETA: 45s - loss: 2.9626 - acc: 0.4011



 21/293 [=>............................] - ETA: 44s - loss: 2.9540 - acc: 0.4014



 25/293 [=>............................] - ETA: 43s - loss: 2.9585 - acc: 0.4020



 27/293 [=>............................] - ETA: 42s - loss: 2.9461 - acc: 0.4022



 30/293 [==>...........................] - ETA: 41s - loss: 2.9468 - acc: 0.4025



 32/293 [==>...........................] - ETA: 41s - loss: 2.9414 - acc: 0.4028



 33/293 [==>...........................] - ETA: 40s - loss: 2.9383 - acc: 0.4029



 35/293 [==>...........................] - ETA: 40s - loss: 2.9414 - acc: 0.4031



 37/293 [==>...........................] - ETA: 40s - loss: 2.9347 - acc: 0.4033



 39/293 [==>...........................] - ETA: 39s - loss: 2.9368 - acc: 0.4035



 41/293 [===>..........................] - ETA: 39s - loss: 2.9419 - acc: 0.4037



 43/293 [===>..........................] - ETA: 38s - loss: 2.9447 - acc: 0.4037



 45/293 [===>..........................] - ETA: 38s - loss: 2.9470 - acc: 0.4038



 48/293 [===>..........................] - ETA: 38s - loss: 2.9509 - acc: 0.4039



 50/293 [====>.........................] - ETA: 37s - loss: 2.9442 - acc: 0.4040



 53/293 [====>.........................] - ETA: 37s - loss: 2.9383 - acc: 0.4040



 55/293 [====>.........................] - ETA: 36s - loss: 2.9336 - acc: 0.4041



 57/293 [====>.........................] - ETA: 36s - loss: 2.9370 - acc: 0.4042



 59/293 [=====>........................] - ETA: 36s - loss: 2.9346 - acc: 0.4042



 62/293 [=====>........................] - ETA: 35s - loss: 2.9292 - acc: 0.4043



 64/293 [=====>........................] - ETA: 35s - loss: 2.9296 - acc: 0.4044



 66/293 [=====>........................] - ETA: 34s - loss: 2.9284 - acc: 0.4045















































































































































































































































































Epoch 14/15
  4/293 [..............................] - ETA: 45s - loss: 2.9565 - acc: 0.4073 



  7/293 [..............................] - ETA: 44s - loss: 3.0121 - acc: 0.4075



  9/293 [..............................] - ETA: 44s - loss: 2.9749 - acc: 0.4069



 11/293 [>.............................] - ETA: 45s - loss: 2.9616 - acc: 0.4065



 14/293 [>.............................] - ETA: 45s - loss: 2.9233 - acc: 0.4075



 16/293 [>.............................] - ETA: 44s - loss: 2.9270 - acc: 0.4080



 17/293 [>.............................] - ETA: 44s - loss: 2.9166 - acc: 0.4082



 19/293 [>.............................] - ETA: 43s - loss: 2.9096 - acc: 0.4086



 21/293 [=>............................] - ETA: 42s - loss: 2.8960 - acc: 0.4091



 24/293 [=>............................] - ETA: 41s - loss: 2.8866 - acc: 0.4098



 26/293 [=>............................] - ETA: 41s - loss: 2.8954 - acc: 0.4101



 28/293 [=>............................] - ETA: 40s - loss: 2.8946 - acc: 0.4104



 29/293 [=>............................] - ETA: 40s - loss: 2.9027 - acc: 0.4105



 32/293 [==>...........................] - ETA: 39s - loss: 2.9135 - acc: 0.4105



 37/293 [==>...........................] - ETA: 38s - loss: 2.9140 - acc: 0.4106



 38/293 [==>...........................] - ETA: 38s - loss: 2.9116 - acc: 0.4106



 39/293 [==>...........................] - ETA: 38s - loss: 2.9085 - acc: 0.4106



 41/293 [===>..........................] - ETA: 38s - loss: 2.9052 - acc: 0.4107



 43/293 [===>..........................] - ETA: 38s - loss: 2.9093 - acc: 0.4107



 45/293 [===>..........................] - ETA: 37s - loss: 2.9043 - acc: 0.4107



 47/293 [===>..........................] - ETA: 37s - loss: 2.9024 - acc: 0.4108



 49/293 [====>.........................] - ETA: 37s - loss: 2.8965 - acc: 0.4108



 53/293 [====>.........................] - ETA: 36s - loss: 2.8888 - acc: 0.4109



 55/293 [====>.........................] - ETA: 36s - loss: 2.8927 - acc: 0.4110



 60/293 [=====>........................] - ETA: 35s - loss: 2.8915 - acc: 0.4111



 61/293 [=====>........................] - ETA: 35s - loss: 2.8898 - acc: 0.4111



 63/293 [=====>........................] - ETA: 34s - loss: 2.8928 - acc: 0.4112



 66/293 [=====>........................] - ETA: 34s - loss: 2.8904 - acc: 0.4112



































































































































































































































































































Epoch 15/15




  6/293 [..............................] - ETA: 46s - loss: 2.8667 - acc: 0.4016 



 13/293 [>.............................] - ETA: 43s - loss: 2.8645 - acc: 0.4081



 15/293 [>.............................] - ETA: 43s - loss: 2.8454 - acc: 0.4091



 17/293 [>.............................] - ETA: 42s - loss: 2.8377 - acc: 0.4100



 20/293 [=>............................] - ETA: 41s - loss: 2.8216 - acc: 0.4112



 21/293 [=>............................] - ETA: 41s - loss: 2.8265 - acc: 0.4116



 24/293 [=>............................] - ETA: 40s - loss: 2.8422 - acc: 0.4123



 26/293 [=>............................] - ETA: 40s - loss: 2.8356 - acc: 0.4127



 27/293 [=>............................] - ETA: 40s - loss: 2.8404 - acc: 0.4128



 28/293 [=>............................] - ETA: 40s - loss: 2.8465 - acc: 0.4129



 30/293 [==>...........................] - ETA: 40s - loss: 2.8437 - acc: 0.4131



 32/293 [==>...........................] - ETA: 40s - loss: 2.8466 - acc: 0.4133



 35/293 [==>...........................] - ETA: 39s - loss: 2.8601 - acc: 0.4134



 37/293 [==>...........................] - ETA: 39s - loss: 2.8687 - acc: 0.4133



 39/293 [==>...........................] - ETA: 38s - loss: 2.8541 - acc: 0.4134



 41/293 [===>..........................] - ETA: 38s - loss: 2.8505 - acc: 0.4134



 43/293 [===>..........................] - ETA: 38s - loss: 2.8466 - acc: 0.4136



 48/293 [===>..........................] - ETA: 37s - loss: 2.8390 - acc: 0.4140



 50/293 [====>.........................] - ETA: 36s - loss: 2.8340 - acc: 0.4142



 52/293 [====>.........................] - ETA: 36s - loss: 2.8350 - acc: 0.4144



 55/293 [====>.........................] - ETA: 35s - loss: 2.8317 - acc: 0.4147



 56/293 [====>.........................] - ETA: 35s - loss: 2.8285 - acc: 0.4148



 59/293 [=====>........................] - ETA: 35s - loss: 2.8320 - acc: 0.4150



 61/293 [=====>........................] - ETA: 34s - loss: 2.8309 - acc: 0.4151



 63/293 [=====>........................] - ETA: 34s - loss: 2.8370 - acc: 0.4152



 65/293 [=====>........................] - ETA: 34s - loss: 2.8378 - acc: 0.4152



 67/293 [=====>........................] - ETA: 33s - loss: 2.8381 - acc: 0.4153













































































































































































































































































































Test image path.

In [244]:
img_dt = "/data/test/rxxch9vw59-2/"
imgs = os.listdir(img_dt+"images")

Generates a caption for a random image among test images.

In [245]:
random_image = np.random.choice(imgs)

In [246]:
# generate_caption(img_dt,random_image)
generate_caption(img_dt,'1228.png')

Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha

Generate captions for all test images.

In [247]:
for i in imgs:
  generate_caption(img_dt, i)

Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)



Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Posit



Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Posit



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Posit



Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Posit



Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Posit



Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Posit



Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Posit



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positio



Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha



Encoder Input Shape: (1, 1, 256)
Encoder Input Shape before LayerNorm: (1, 1, 256)
Encoder Input Shape after LayerNorm: (1, 1, 256)
Encoder Output Shape: (1, 1, 256)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Shape: (1, 11)
Positional Embedding Output Shape: (1, 11, 256)
Decoder Output Shape: (1, 11, 3138)
Decoder Input Shape: (1, 11)
Positional Embedding Input Sha

Save the weights of the trained model.

In [248]:
# After training the model, save the weights
caption_model.save_weights(f'{tmpx}imgcap_{mdx}', save_format='tf')

Dump the vectorised vocabulary.

In [249]:
# Define the directory path
directory = f'/results/Vocab/{mdx}'

# Create the directory if it doesn't exist
os.makedirs(directory, exist_ok=True)

# Save the vocabulary using pickle
with open(f'{directory}/vocab_{mdx}', 'wb') as f:
    pickle.dump(vocab, f)