In [1]:
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
import pickle as pkl

# load tokenized dataset

In [2]:
dataset_dir_path = '../../data/processed/tokenized_data/'
with open(dataset_dir_path + 'train_data.pkl', 'rb') as f:
    tokenized_train_data = pkl.load(f)

with open(dataset_dir_path + 'valid_data.pkl', 'rb') as f:
    tokenized_valid_data = pkl.load(f)

In [3]:
print(f'Input(de) {tokenized_train_data[0][0]}')
print(f'Output(en) {tokenized_train_data[0][1]}')

Input(de) tensor([   2,   21,   85,  256,   31,   86,   22,   93,    7,   16,  114, 5645,
        3245,    3])
Output(en) tensor([   2,   19,   25,   15, 1197,  817,   17,   58,   84,  332, 1319,    3])


# load vocab

In [4]:
vocab_dir_path = '../../data/processed/vocab/'

with open(vocab_dir_path + 'token2idx_de.pkl', 'rb') as f:
    token2idx_de= pkl.load(f)
with open(vocab_dir_path + 'token2idx_en.pkl', 'rb') as f:
    token2idx_en = pkl.load(f)
with open(vocab_dir_path + 'idx2token_de.pkl', 'rb') as f:
    idx2token_de = pkl.load(f)
with open(vocab_dir_path + 'idx2token_en.pkl', 'rb') as f:
    idx2token_en = pkl.load(f)

# making the batch

In [5]:
batch_size = 128
PAD_INDEX = token2idx_de['<pad>']
START_INDEX = token2idx_en['<start>']
END_INDEX = token2idx_en['<end>']

In [6]:
def generate_batch(data_batch):
    batch_src = []
    batch_tgt = []
    for src, tgt in data_batch:
        batch_src.append(src)
        batch_tgt.append(tgt)
    
    batch_src = pad_sequence(batch_src, padding_value=PAD_INDEX)
    batch_tgt = pad_sequence(batch_tgt, padding_value=PAD_INDEX)

    return batch_src, batch_tgt

In [7]:
train_iter = DataLoader(tokenized_train_data, batch_size=batch_size, shuffle=True, collate_fn=generate_batch)
valid_iter = DataLoader(tokenized_valid_data, batch_size=batch_size, shuffle=True, collate_fn=generate_batch)

In [8]:
# show train_iter
# each column is a text
list(train_iter)[0]

(tensor([[   2,    2,    2,  ...,    2,    2,    2],
         [   5,   21,  265,  ...,    5,   21,   21],
         [1658,   85, 3228,  ...,   27,   43,  115],
         ...,
         [   1,    1,    1,  ...,    1,    1,    1],
         [   1,    1,    1,  ...,    1,    1,    1],
         [   1,    1,    1,  ...,    1,    1,    1]]),
 tensor([[  2,   2,   2,  ...,   2,   2,   2],
         [  6,  19, 220,  ...,   6,  19,  19],
         [533,  25,  10,  ...,  33,  22,  25],
         ...,
         [  1,   1,   1,  ...,   1,   1,   1],
         [  1,   1,   1,  ...,   1,   1,   1],
         [  1,   1,   1,  ...,   1,   1,   1]]))

In [9]:
for src, tgt in train_iter:
    print(src)
    print(tgt)
    print(f'src shape : {src.shape}')
    print(f'tgt.shape : {tgt.shape}')
    break

tensor([[   2,    2,    2,  ...,    2,    2,    2],
        [  21,   87,  126,  ...,    5,  722,    5],
        [  31,  256, 3179,  ...,  949,    7, 2244],
        ...,
        [   1,    1,    1,  ...,    1,    1,    1],
        [   1,    1,    1,  ...,    1,    1,    1],
        [   1,    1,    1,  ...,    1,    1,    1]])
tensor([[   2,    2,    2,  ...,    2,    2,    2],
        [  19,   49,   49,  ...,   53, 2675,    6],
        [  36,   24,  403,  ...,  141,  226, 1710],
        ...,
        [   1,    1,    1,  ...,    1,    4,    1],
        [   1,    1,    1,  ...,    1,  767,    1],
        [   1,    1,    1,  ...,    1,    3,    1]])
src shape : torch.Size([24, 128])
tgt.shape : torch.Size([27, 128])


# Model

In [10]:
import math
import torch
import torch.nn as nn
from torch import Tensor

## token embedding

In [11]:
class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size, embedding_size):
        super().__init__()
        # nn.Embedding is a simple lookup table.
        # if token index is set, it will return the corresponding embedding vector.
        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.embedding_size = embedding_size
    
    def forward(self, tokens: Tensor):
        # the reason for this multiply is to align the range of the values
        # It is to make the positional encoding relatively smaller. 
        # This means the original meaning in the embedding vector won’t be lost 
        # when we add them together.
        return self.embedding(tokens.long()) * math.sqrt(self.embedding_size)

## positional encoging

In [None]:
class PositionalEncoding(nn.Module):
    
    def __init__(self, embedding_size: int, dropout: float, maxlen: int = 5000):
        super().__init__()
        
        den = torch.exp(-torch.arange(0, embedding_size, 2) * math.log(10000) / embedding_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        embedding_pos = torch.zeros((maxlen, embedding_size))
        embedding_pos[:, 0::2] = torch.sin(pos * den) # extract even element (start:stop:step)
        embedding_pos[:, 1::2] = torch.cos(pos * den) # extract odd element (start:stop:step)
        embedding_pos = embedding_pos.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('embedding_pos', embedding_pos) # positional encoding is not updated by learning

    def forward(self, token_embedding: Tensor):
        #print(f'token_embedding : {token_embedding.shape}')
        #print(f'positional_encoding : {self.embedding_pos.shape}') # self.embedding_pos is defined by self.register_fuffer.
        # self.embedding_pos is not updated by learning
        #print(f'positional_encoding : {self.embedding_pos[:token_embedding.size(0), :].shape}')
        return self.dropout(token_embedding + self.embedding_pos[:token_embedding.size(0), :])

## masking

In [13]:
def generate_square_subsequent_mask(seq_len, PAD_INDEX):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # seq_len is the length of the sentence
    # The mask is used to prevent the model from looking ahead in the sequence.
    # The mask is a square matrix of size (seq_len, seq_len)
    # The upper triangle of the matrix is filled with -inf
    # The lower triangle of the matrix is filled with 0
    mask = (torch.triu(torch.ones((seq_len, seq_len), device=device)) == 1).transpose(0, 1) # upper triangle
    mask = mask.float().masked_fill(mask == 1, float('-inf')).masked_fill(mask == PAD_INDEX, float(0.0)) # 1 -> -inf, 0 -> 0
    return mask

def create_mask(src, tgt, PAD_INDEX):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    seq_len_src = src.shape[0] # word number in one sentence in source
    seq_len_tgt = tgt.shape[0] # word number in one sentence in target

    mask_tgt = generate_square_subsequent_mask(seq_len_tgt, PAD_INDEX=PAD_INDEX)
    mask_src = torch.zeros((seq_len_src, seq_len_src), device=device).type(torch.bool)

    padding_mask_src = (src == PAD_INDEX).transpose(0, 1)
    padding_mask_tgt = (tgt == PAD_INDEX).transpose(0, 1)

    return mask_src, mask_tgt, padding_mask_src, padding_mask_tgt

In [14]:
seq_len_src = src.shape[0]
mask_src = torch.zeros((seq_len_src, seq_len_src)).type(torch.bool)

In [15]:
mask_src

tensor([[False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False],
        [False, Fals

In [16]:
from torch.nn import TransformerEncoder, TransformerDecoder, TransformerEncoderLayer, TransformerDecoderLayer

In [17]:
class Seq2SeqTransformer(nn.Module):
    def __init__(self, num_encoder_layers: int, num_decoder_layers: int, embedding_size: int, vocab_size_src: int, vocab_size_tgt: int, dim_feedforward: int = 512, dropout: float = 0.1, nhead:int = 8):
        super().__init__()

        self.token_embedding_src = TokenEmbedding(vocab_size_src, embedding_size)
        self.positional_encoding = PositionalEncoding(embedding_size, dropout=dropout)
        encoder_layer = TransformerEncoderLayer(d_model=embedding_size, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layer=encoder_layer, num_layers=num_encoder_layers)

        self.token_embedding_tgt = TokenEmbedding(vocab_size_tgt, embedding_size)
        decoder_layer = TransformerDecoderLayer(d_model=embedding_size, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_decoder = TransformerDecoder(decoder_layer=decoder_layer, num_layers=num_decoder_layers)
        self.output = nn.Linear(embedding_size, vocab_size_tgt)
    
    def forward(self, src: Tensor, tgt: Tensor, mask_src: Tensor, mask_tgt: Tensor, padding_mask_src: Tensor, padding_mask_tgt: Tensor, memory_key_padding_mask: Tensor):
        # src : (seq_len_src, batch_size)
        # tgt : (seq_len_tgt, batch_size)
        # mask_src : (seq_len_src, seq_len_src)
        # mask_tgt : (seq_len_tgt, seq_len_tgt)
        # padding_mask_src : (batch_size, seq_len_src)
        # padding_mask_tgt : (batch_size, seq_len_tgt)

        embedding_src = self.positional_encoding(self.token_embedding_src(src))
        memory = self.transformer_encoder(embedding_src, mask_src, padding_mask_src)
        embedding_tgt = self.positional_encoding(self.token_embedding_tgt(tgt))
        outs = self.transformer_decoder(embedding_tgt, memory, mask_tgt, None, padding_mask_tgt, memory_key_padding_mask)
        return self.output(outs)
    
    def encode(self, src: Tensor, mask_src: Tensor):
        return self.transformer_encoder(self.positional_encoding(self.token_embedding_src(src)), mask_src)
    
    def decode(self, tgt: Tensor, memory: Tensor, mask_tgt: Tensor):
        return self.transformer_decoder(self.positional_encoding(self.token_embedding_tgt(tgt), memory, mask_tgt))


In [18]:
from tqdm import tqdm

In [19]:
def train(model, data, optimizer, criterion, PAD_INDEX):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model.train()
    losses = 0.0
    for src, tgt in tqdm(data):
        src = src.to(device)
        tgt = tgt.to(device)
        input_tgt = tgt[:-1, :] # remove last token
        mask_src, mask_tgt, padding_mask_src, padding_mask_tgt = create_mask(src, input_tgt, PAD_INDEX)

        logits = model(src=src, tgt=input_tgt,
                        mask_src=mask_src, mask_tgt=mask_tgt,
                        padding_mask_src=padding_mask_src, padding_mask_tgt=padding_mask_tgt,
                        memory_key_padding_mask=padding_mask_src)

        optimizer.zero_grad()

        output_tgt = tgt[1:, :] # remove first token
        loss = criterion(logits.reshape(-1, logits.shape[-1]), output_tgt.reshape(-1))
        loss.backward()

        optimizer.step()
        losses += loss.item()
    
    return losses / len(data)

In [20]:
def evaluate(model, data, criterion, PAD_INDEX):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model.eval()
    losses = 0.0

    for src, tgt in data:
        src = src.to(device)
        tgt = tgt.to(device)

        input_tgt = tgt[:-1, :] # remove last token
        mask_src, mask_tgt, padding_mask_src, padding_mask_tgt = create_mask(src, input_tgt, PAD_INDEX)
        logits = model(src=src, tgt=input_tgt, 
                       mask_src=mask_src, mask_tgt=mask_tgt,
                       padding_mask_src=padding_mask_src, padding_mask_tgt=padding_mask_tgt,
                       memory_key_padding_mask=padding_mask_src)

        output_tgt = tgt[1:, :] # remove first token
        loss = criterion(logits.reshape(-1, logits.shape[-1]), output_tgt.reshape(-1))
        losses += loss.item()

    return losses / len(data)

In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

vocab_size_src = len(token2idx_de)
vocab_size_tgt = len(token2idx_en)
embedding_size = 240 # smaller than original 512
nhead = 8
dim_feedforward = 100
num_encoder_layers = 2
num_decoder_layers = 2
dropout = 0.1

model = Seq2SeqTransformer(
    num_encoder_layers=num_encoder_layers,
    num_decoder_layers=num_decoder_layers,
    embedding_size=embedding_size,
    vocab_size_src=vocab_size_src,
    vocab_size_tgt=vocab_size_tgt,
    dim_feedforward=dim_feedforward,
    dropout=dropout,
    nhead=nhead
)

for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p) # Xavier initialization

model = model.to(device)

criterion = torch.nn.CrossEntropyLoss(ignore_index=PAD_INDEX)
optimizer = torch.optim.Adam(model.parameters())



In [22]:
import time

epoch = 100
best_loss = float('inf')
best_model = None
patience = 10
counter = 0

for loop in range(epoch):
    start_time = time.time()

    loss_train = train(
        model=model, data=train_iter, optimizer=optimizer,
        criterion=criterion, PAD_INDEX=PAD_INDEX
    )

    elapsed_time = time.time() - start_time
    loss_valid = evaluate(
        model=model, data=valid_iter, criterion=criterion, PAD_INDEX=PAD_INDEX
    )

    print(f'epoch: {loop+1}, train loss: {loss_train:.4f}, valid loss: {loss_valid:.4f}, elapsed time: {elapsed_time:.4f} sec')

    if best_loss > loss_valid:
        best_loss = loss_valid
        best_model = model
        counter = 0
    
    if counter > patience:
        break

    counter += 1



token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


  2%|▏         | 5/227 [00:00<00:15, 14.62it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

  6%|▌         | 13/227 [00:00<00:07, 27.03it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 10%|█         | 23/227 [00:00<00:05, 35.78it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 15%|█▍        | 33/227 [00:01<00:04, 39.96it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 19%|█▉        | 43/227 [00:01<00:04, 40.24it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 21%|██        | 48/227 [00:01<00:04, 41.02it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 26%|██▌       | 58/227 [00:01<00:04, 41.08it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 30%|██▉       | 68/227 [00:01<00:03, 40.89it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 34%|███▍      | 78/227 [00:02<00:03, 41.27it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 37%|███▋      | 83/227 [00:02<00:03, 41.14it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 41%|████      | 93/227 [00:02<00:03, 41.35it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 45%|████▌     | 103/227 [00:02<00:03, 40.51it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 50%|████▉     | 113/227 [00:03<00:02, 41.46it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 54%|█████▍    | 123/227 [00:03<00:02, 42.44it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 56%|█████▋    | 128/227 [00:03<00:02, 42.60it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 61%|██████    | 138/227 [00:03<00:02, 42.53it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 65%|██████▌   | 148/227 [00:03<00:01, 42.44it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 70%|██████▉   | 158/227 [00:04<00:01, 42.20it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 74%|███████▍  | 168/227 [00:04<00:01, 42.36it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 76%|███████▌  | 173/227 [00:04<00:01, 41.19it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 81%|████████  | 183/227 [00:04<00:01, 42.66it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 85%|████████▌ | 193/227 [00:04<00:00, 41.42it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 89%|████████▉ | 203/227 [00:05<00:00, 41.95it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 92%|█████████▏| 208/227 [00:05<00:00, 41.09it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 96%|█████████▌| 218/227 [00:05<00:00, 41.80it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 39.68it/s]


token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


  2%|▏         | 4/227 [00:00<00:06, 36.04it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

  4%|▎         | 8/227 [00:00<00:05, 37.42it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


  6%|▌         | 13/227 [00:00<00:05, 40.06it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

  8%|▊         | 18/227 [00:00<00:05, 40.26it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 10%|█         | 23/227 [00:00<00:05, 38.31it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 15%|█▍        | 33/227 [00:00<00:04, 40.24it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 17%|█▋        | 38/227 [00:00<00:04, 41.11it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 19%|█▉        | 43/227 [00:01<00:04, 41.70it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 21%|██        | 48/227 [00:01<00:04, 41.55it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 23%|██▎       | 53/227 [00:01<00:04, 41.54it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 26%|██▌       | 58/227 [00:01<00:04, 41.89it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 30%|██▉       | 68/227 [00:01<00:03, 42.31it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 34%|███▍      | 78/227 [00:01<00:03, 41.91it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 37%|███▋      | 83/227 [00:02<00:03, 41.82it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 39%|███▉      | 88/227 [00:02<00:03, 42.11it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 41%|████      | 93/227 [00:02<00:03, 42.27it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 43%|████▎     | 98/227 [00:02<00:03, 42.93it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 45%|████▌     | 103/227 [00:02<00:02, 42.05it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 48%|████▊     | 108/227 [00:02<00:02, 40.75it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 50%|████▉     | 113/227 [00:02<00:02, 40.04it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 52%|█████▏    | 118/227 [00:02<00:02, 40.95it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 54%|█████▍    | 123/227 [00:02<00:02, 41.51it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 56%|█████▋    | 128/227 [00:03<00:02, 41.22it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 61%|██████    | 138/227 [00:03<00:02, 41.71it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embeddi

 65%|██████▌   | 148/227 [00:03<00:01, 43.11it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 67%|██████▋   | 153/227 [00:03<00:01, 42.60it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 70%|██████▉   | 158/227 [00:03<00:01, 41.59it/s]

token_embedding : torch.Size([31, 128, 240])

 72%|███████▏  | 163/227 [00:03<00:01, 41.89it/s]


positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_e

 76%|███████▌  | 173/227 [00:04<00:01, 41.23it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 81%|████████  | 183/227 [00:04<00:01, 41.04it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 85%|████████▌ | 193/227 [00:04<00:00, 41.24it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 87%|████████▋ | 198/227 [00:04<00:00, 40.80it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 92%|█████████▏| 208/227 [00:05<00:00, 40.51it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 96%|█████████▌| 218/227 [00:05<00:00, 40.65it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 41.28it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi




token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([34, 118, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([24, 118, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
epoch: 2, train loss: 0.5648, valid loss: 0.2993, elapsed time: 5.5017 sec


  2%|▏         | 4/227 [00:00<00:05, 37.33it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

  6%|▌         | 13/227 [00:00<00:05, 38.84it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 10%|█         | 23/227 [00:00<00:05, 40.25it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 12%|█▏        | 28/227 [00:00<00:05, 39.43it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 14%|█▍        | 32/227 [00:00<00:04, 39.41it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 16%|█▋        | 37/227 [00:00<00:04, 40.54it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 19%|█▊        | 42/227 [00:01<00:04, 41.14it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 21%|██        | 47/227 [00:01<00:04, 41.56it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 25%|██▍       | 56/227 [00:01<00:04, 39.65it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 29%|██▊       | 65/227 [00:01<00:04, 40.35it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 33%|███▎      | 75/227 [00:01<00:03, 40.11it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 35%|███▌      | 80/227 [00:01<00:03, 41.02it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 37%|███▋      | 85/227 [00:02<00:03, 41.27it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 40%|███▉      | 90/227 [00:02<00:03, 41.19it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 44%|████▍     | 100/227 [00:02<00:03, 41.81it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 48%|████▊     | 110/227 [00:02<00:02, 41.58it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 51%|█████     | 115/227 [00:02<00:02, 41.42it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 53%|█████▎    | 120/227 [00:02<00:02, 42.13it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 55%|█████▌    | 125/227 [00:03<00:02, 42.18it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 57%|█████▋    | 130/227 [00:03<00:02, 41.70it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 59%|█████▉    | 135/227 [00:03<00:02, 42.18it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 64%|██████▍   | 145/227 [00:03<00:01, 42.77it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 68%|██████▊   | 155/227 [00:03<00:01, 41.74it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 70%|███████   | 160/227 [00:03<00:01, 40.37it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 73%|███████▎  | 165/227 [00:04<00:01, 40.47it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])


 75%|███████▍  | 170/227 [00:04<00:01, 40.61it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 79%|███████▉  | 180/227 [00:04<00:01, 40.24it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 83%|████████▎ | 189/227 [00:04<00:00, 39.69it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 88%|████████▊ | 199/227 [00:04<00:00, 41.41it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 90%|████████▉ | 204/227 [00:04<00:00, 41.85it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 92%|█████████▏| 209/227 [00:05<00:00, 41.37it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 94%|█████████▍| 214/227 [00:05<00:00, 40.74it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 99%|█████████▊| 224/227 [00:05<00:00, 40.84it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 40.90it/s]


token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

  2%|▏         | 5/227 [00:00<00:05, 40.90it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

  4%|▍         | 10/227 [00:00<00:05, 39.99it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embeddi

  7%|▋         | 15/227 [00:00<00:05, 39.32it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


  8%|▊         | 19/227 [00:00<00:05, 38.53it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 11%|█         | 24/227 [00:00<00:05, 38.52it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 12%|█▏        | 28/227 [00:00<00:05, 38.61it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embeddi

 14%|█▍        | 32/227 [00:00<00:05, 38.89it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])


 16%|█▌        | 36/227 [00:00<00:04, 39.00it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 18%|█▊        | 41/227 [00:01<00:04, 39.67it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 20%|█▉        | 45/227 [00:01<00:04, 39.62it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 24%|██▍       | 55/227 [00:01<00:04, 40.14it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 26%|██▋       | 60/227 [00:01<00:04, 40.31it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 29%|██▊       | 65/227 [00:01<00:03, 41.20it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])


 31%|███       | 70/227 [00:01<00:03, 40.51it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 33%|███▎      | 75/227 [00:01<00:03, 40.66it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 35%|███▌      | 80/227 [00:01<00:03, 41.17it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 40%|███▉      | 90/227 [00:02<00:03, 41.47it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 42%|████▏     | 95/227 [00:02<00:03, 41.58it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 44%|████▍     | 100/227 [00:02<00:03, 41.66it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 46%|████▋     | 105/227 [00:02<00:02, 41.91it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 48%|████▊     | 110/227 [00:02<00:02, 41.58it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 51%|█████     | 115/227 [00:02<00:02, 41.85it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 53%|█████▎    | 120/227 [00:02<00:02, 42.54it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 55%|█████▌    | 125/227 [00:03<00:02, 42.41it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 59%|█████▉    | 135/227 [00:03<00:02, 41.67it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 62%|██████▏   | 140/227 [00:03<00:02, 40.53it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 64%|██████▍   | 145/227 [00:03<00:02, 40.03it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 66%|██████▌   | 150/227 [00:03<00:01, 40.34it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 68%|██████▊   | 155/227 [00:03<00:01, 40.21it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 70%|███████   | 160/227 [00:03<00:01, 40.61it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embeddi

 75%|███████▍  | 170/227 [00:04<00:01, 40.98it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 77%|███████▋  | 175/227 [00:04<00:01, 40.34it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 79%|███████▉  | 180/227 [00:04<00:01, 40.28it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 81%|████████▏ | 185/227 [00:04<00:01, 40.50it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 86%|████████▌ | 195/227 [00:04<00:00, 40.73it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 90%|█████████ | 205/227 [00:05<00:00, 41.14it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 95%|█████████▍| 215/227 [00:05<00:00, 40.77it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 97%|█████████▋| 220/227 [00:05<00:00, 39.81it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 40.65it/s]


token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

  2%|▏         | 5/227 [00:00<00:05, 42.99it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

  4%|▍         | 10/227 [00:00<00:05, 41.30it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

  7%|▋         | 15/227 [00:00<00:05, 41.71it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

  9%|▉         | 20/227 [00:00<00:05, 41.15it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 11%|█         | 25/227 [00:00<00:04, 41.10it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 13%|█▎        | 30/227 [00:00<00:04, 40.40it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 15%|█▌        | 35/227 [00:00<00:04, 40.00it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 18%|█▊        | 40/227 [00:00<00:04, 40.10it/s]

token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 20%|█▉        | 45/227 [00:01<00:04, 38.86it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 22%|██▏       | 49/227 [00:01<00:04, 38.78it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 24%|██▍       | 54/227 [00:01<00:04, 39.73it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 26%|██▌       | 58/227 [00:01<00:04, 39.33it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 28%|██▊       | 63/227 [00:01<00:04, 39.00it/s]

token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 30%|██▉       | 68/227 [00:01<00:04, 39.62it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 32%|███▏      | 72/227 [00:01<00:03, 39.46it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 34%|███▍      | 77/227 [00:01<00:03, 40.03it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 36%|███▌      | 82/227 [00:02<00:03, 40.63it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 38%|███▊      | 87/227 [00:02<00:03, 40.95it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 41%|████      | 92/227 [00:02<00:03, 41.47it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 43%|████▎     | 97/227 [00:02<00:03, 41.72it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 45%|████▍     | 102/227 [00:02<00:03, 41.14it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 47%|████▋     | 107/227 [00:02<00:02, 41.05it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 49%|████▉     | 112/227 [00:02<00:02, 41.65it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 52%|█████▏    | 117/227 [00:02<00:02, 41.05it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 54%|█████▎    | 122/227 [00:03<00:02, 39.94it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 56%|█████▌    | 127/227 [00:03<00:02, 39.44it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 58%|█████▊    | 131/227 [00:03<00:02, 39.19it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 59%|█████▉    | 135/227 [00:03<00:02, 39.08it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 62%|██████▏   | 140/227 [00:03<00:02, 39.72it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 64%|██████▍   | 145/227 [00:03<00:02, 40.35it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 66%|██████▌   | 150/227 [00:03<00:01, 40.75it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 68%|██████▊   | 155/227 [00:03<00:01, 40.18it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 70%|███████   | 160/227 [00:03<00:01, 39.77it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 73%|███████▎  | 165/227 [00:04<00:01, 39.89it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 74%|███████▍  | 169/227 [00:04<00:01, 39.81it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 76%|███████▌  | 173/227 [00:04<00:01, 39.56it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 78%|███████▊  | 177/227 [00:04<00:01, 38.08it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 80%|███████▉  | 181/227 [00:04<00:01, 38.39it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 81%|████████▏ | 185/227 [00:04<00:01, 38.40it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 84%|████████▎ | 190/227 [00:04<00:00, 39.80it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 86%|████████▌ | 195/227 [00:04<00:00, 40.28it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 88%|████████▊ | 200/227 [00:04<00:00, 40.60it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 93%|█████████▎| 210/227 [00:05<00:00, 40.25it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 95%|█████████▍| 215/227 [00:05<00:00, 40.04it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 97%|█████████▋| 220/227 [00:05<00:00, 40.24it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 99%|█████████▉| 225/227 [00:05<00:00, 39.85it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


100%|██████████| 227/227 [00:05<00:00, 40.11it/s]


token_embedding : torch.Size([24, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddin

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


  2%|▏         | 4/227 [00:00<00:05, 38.12it/s]

token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

  4%|▎         | 8/227 [00:00<00:05, 37.92it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


  5%|▌         | 12/227 [00:00<00:05, 38.31it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

  7%|▋         | 16/227 [00:00<00:05, 38.53it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


  9%|▉         | 20/227 [00:00<00:05, 38.21it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 11%|█         | 25/227 [00:00<00:05, 39.06it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 13%|█▎        | 29/227 [00:00<00:05, 38.75it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 15%|█▍        | 34/227 [00:00<00:04, 39.92it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 17%|█▋        | 39/227 [00:00<00:04, 40.56it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 19%|█▉        | 44/227 [00:01<00:04, 40.23it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 22%|██▏       | 49/227 [00:01<00:04, 40.68it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 24%|██▍       | 54/227 [00:01<00:04, 40.65it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embeddi

 26%|██▌       | 59/227 [00:01<00:04, 39.94it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 28%|██▊       | 63/227 [00:01<00:04, 39.46it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 30%|██▉       | 68/227 [00:01<00:04, 39.22it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 32%|███▏      | 73/227 [00:01<00:03, 39.22it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 34%|███▍      | 77/227 [00:01<00:03, 39.34it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 36%|███▌      | 82/227 [00:02<00:03, 40.09it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 38%|███▊      | 87/227 [00:02<00:03, 39.55it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 40%|████      | 91/227 [00:02<00:03, 38.76it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 42%|████▏     | 95/227 [00:02<00:03, 38.10it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 44%|████▍     | 100/227 [00:02<00:03, 38.88it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 46%|████▋     | 105/227 [00:02<00:03, 38.87it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 48%|████▊     | 109/227 [00:02<00:03, 38.66it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 50%|████▉     | 113/227 [00:02<00:02, 38.51it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 52%|█████▏    | 117/227 [00:02<00:02, 38.74it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 54%|█████▎    | 122/227 [00:03<00:02, 39.82it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 56%|█████▌    | 127/227 [00:03<00:02, 40.89it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 58%|█████▊    | 132/227 [00:03<00:02, 40.79it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 60%|██████    | 137/227 [00:03<00:02, 40.22it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 63%|██████▎   | 142/227 [00:03<00:02, 40.41it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 65%|██████▍   | 147/227 [00:03<00:01, 40.38it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 67%|██████▋   | 152/227 [00:03<00:01, 40.81it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 69%|██████▉   | 157/227 [00:03<00:01, 41.44it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 71%|███████▏  | 162/227 [00:04<00:01, 40.87it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 74%|███████▎  | 167/227 [00:04<00:01, 41.38it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 76%|███████▌  | 172/227 [00:04<00:01, 42.05it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 78%|███████▊  | 177/227 [00:04<00:01, 41.44it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 80%|████████  | 182/227 [00:04<00:01, 41.45it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 82%|████████▏ | 187/227 [00:04<00:00, 41.43it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 85%|████████▍ | 192/227 [00:04<00:00, 41.81it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 87%|████████▋ | 197/227 [00:04<00:00, 41.65it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 89%|████████▉ | 202/227 [00:05<00:00, 41.61it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 91%|█████████ | 207/227 [00:05<00:00, 41.84it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 93%|█████████▎| 212/227 [00:05<00:00, 41.40it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 96%|█████████▌| 217/227 [00:05<00:00, 41.17it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 98%|█████████▊| 222/227 [00:05<00:00, 41.62it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 40.30it/s]

token_embedding : torch.Size([27, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([34, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding




token_embedding : torch.Size([32, 118, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 118, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
epoch: 6, train loss: 0.0320, valid loss: 0.1852, elapsed time: 5.6360 sec


  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


  2%|▏         | 4/227 [00:00<00:05, 39.34it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

  4%|▎         | 8/227 [00:00<00:05, 38.46it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

  6%|▌         | 13/227 [00:00<00:05, 39.75it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

  8%|▊         | 18/227 [00:00<00:05, 40.45it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 10%|█         | 23/227 [00:00<00:04, 41.46it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 12%|█▏        | 28/227 [00:00<00:04, 41.01it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 15%|█▍        | 33/227 [00:00<00:04, 41.05it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 17%|█▋        | 38/227 [00:00<00:04, 40.39it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 19%|█▉        | 43/227 [00:01<00:04, 40.77it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 21%|██        | 48/227 [00:01<00:04, 40.84it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 23%|██▎       | 53/227 [00:01<00:04, 40.98it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 26%|██▌       | 58/227 [00:01<00:04, 40.76it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 28%|██▊       | 63/227 [00:01<00:04, 39.96it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 30%|██▉       | 68/227 [00:01<00:03, 40.81it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 32%|███▏      | 73/227 [00:01<00:03, 41.27it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])


 34%|███▍      | 78/227 [00:01<00:03, 40.73it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 37%|███▋      | 83/227 [00:02<00:03, 40.51it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 39%|███▉      | 88/227 [00:02<00:03, 41.18it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 41%|████      | 93/227 [00:02<00:03, 41.17it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 43%|████▎     | 98/227 [00:02<00:03, 40.62it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 45%|████▌     | 103/227 [00:02<00:02, 41.34it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 48%|████▊     | 108/227 [00:02<00:02, 40.98it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 50%|████▉     | 113/227 [00:02<00:02, 41.84it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])


 52%|█████▏    | 118/227 [00:02<00:02, 42.06it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 54%|█████▍    | 123/227 [00:03<00:02, 41.84it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 56%|█████▋    | 128/227 [00:03<00:02, 40.87it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 59%|█████▊    | 133/227 [00:03<00:02, 40.86it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 61%|██████    | 138/227 [00:03<00:02, 41.44it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 63%|██████▎   | 143/227 [00:03<00:02, 41.09it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 65%|██████▌   | 148/227 [00:03<00:01, 40.87it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 67%|██████▋   | 153/227 [00:03<00:01, 39.85it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 70%|██████▉   | 158/227 [00:03<00:01, 41.35it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 72%|███████▏  | 163/227 [00:03<00:01, 41.70it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 74%|███████▍  | 168/227 [00:04<00:01, 41.40it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 76%|███████▌  | 173/227 [00:04<00:01, 41.00it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 78%|███████▊  | 178/227 [00:04<00:01, 41.34it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 81%|████████  | 183/227 [00:04<00:01, 41.08it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 83%|████████▎ | 188/227 [00:04<00:00, 40.51it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 85%|████████▌ | 193/227 [00:04<00:00, 40.12it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 87%|████████▋ | 198/227 [00:04<00:00, 40.34it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])


 89%|████████▉ | 203/227 [00:04<00:00, 39.63it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 92%|█████████▏| 208/227 [00:05<00:00, 39.84it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])


 93%|█████████▎| 212/227 [00:05<00:00, 39.46it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 96%|█████████▌| 217/227 [00:05<00:00, 39.54it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 97%|█████████▋| 221/227 [00:05<00:00, 39.64it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 40.77it/s]


token_embedding : torch.Size([28, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


  2%|▏         | 4/227 [00:00<00:05, 39.30it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

  4%|▍         | 9/227 [00:00<00:05, 39.26it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

  6%|▌         | 13/227 [00:00<00:05, 39.01it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

  7%|▋         | 17/227 [00:00<00:05, 38.96it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 10%|▉         | 22/227 [00:00<00:05, 39.89it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 11%|█▏        | 26/227 [00:00<00:05, 38.45it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 14%|█▎        | 31/227 [00:00<00:04, 39.27it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 15%|█▌        | 35/227 [00:00<00:04, 38.89it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 17%|█▋        | 39/227 [00:00<00:04, 39.13it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 19%|█▉        | 43/227 [00:01<00:04, 39.35it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 21%|██        | 47/227 [00:01<00:04, 39.25it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 23%|██▎       | 52/227 [00:01<00:04, 40.09it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 25%|██▌       | 57/227 [00:01<00:04, 40.52it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 27%|██▋       | 62/227 [00:01<00:04, 41.10it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 30%|██▉       | 67/227 [00:01<00:03, 41.69it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 32%|███▏      | 72/227 [00:01<00:03, 41.32it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 34%|███▍      | 77/227 [00:01<00:03, 41.10it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 36%|███▌      | 82/227 [00:02<00:03, 39.94it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 38%|███▊      | 87/227 [00:02<00:03, 40.43it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 41%|████      | 92/227 [00:02<00:03, 41.01it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 43%|████▎     | 97/227 [00:02<00:03, 40.55it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 45%|████▍     | 102/227 [00:02<00:03, 41.21it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 47%|████▋     | 107/227 [00:02<00:02, 40.46it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 49%|████▉     | 112/227 [00:02<00:02, 40.54it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 52%|█████▏    | 117/227 [00:02<00:02, 40.42it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 54%|█████▎    | 122/227 [00:03<00:02, 40.14it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 56%|█████▌    | 127/227 [00:03<00:02, 40.04it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 58%|█████▊    | 132/227 [00:03<00:02, 40.34it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 60%|██████    | 137/227 [00:03<00:02, 40.07it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 63%|██████▎   | 142/227 [00:03<00:02, 40.50it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 65%|██████▍   | 147/227 [00:03<00:01, 40.09it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 67%|██████▋   | 152/227 [00:03<00:01, 39.43it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 69%|██████▊   | 156/227 [00:03<00:01, 39.34it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 71%|███████   | 161/227 [00:04<00:01, 39.85it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 73%|███████▎  | 166/227 [00:04<00:01, 40.33it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 75%|███████▌  | 171/227 [00:04<00:01, 41.10it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 78%|███████▊  | 176/227 [00:04<00:01, 41.37it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 80%|███████▉  | 181/227 [00:04<00:01, 40.92it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 82%|████████▏ | 186/227 [00:04<00:01, 40.52it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 84%|████████▍ | 191/227 [00:04<00:00, 40.64it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 86%|████████▋ | 196/227 [00:04<00:00, 40.32it/s]

token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 89%|████████▊ | 201/227 [00:04<00:00, 40.45it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 91%|█████████ | 206/227 [00:05<00:00, 39.76it/s]

token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 93%|█████████▎| 211/227 [00:05<00:00, 40.07it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 95%|█████████▌| 216/227 [00:05<00:00, 40.50it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 97%|█████████▋| 221/227 [00:05<00:00, 40.71it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 40.32it/s]

token_embedding : torch.Size([22, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])





token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])


  2%|▏         | 4/227 [00:00<00:05, 37.27it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

  4%|▍         | 9/227 [00:00<00:05, 39.96it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

  6%|▌         | 13/227 [00:00<00:05, 37.47it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

  7%|▋         | 17/227 [00:00<00:05, 37.73it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 10%|▉         | 22/227 [00:00<00:05, 39.41it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 12%|█▏        | 27/227 [00:00<00:04, 40.12it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 14%|█▍        | 32/227 [00:00<00:04, 40.80it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 16%|█▋        | 37/227 [00:00<00:04, 41.42it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 19%|█▊        | 42/227 [00:01<00:04, 41.26it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 21%|██        | 47/227 [00:01<00:04, 40.89it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 23%|██▎       | 52/227 [00:01<00:04, 40.77it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 25%|██▌       | 57/227 [00:01<00:04, 39.92it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 27%|██▋       | 61/227 [00:01<00:04, 39.42it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 29%|██▉       | 66/227 [00:01<00:04, 39.94it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 31%|███       | 70/227 [00:01<00:03, 39.91it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 33%|███▎      | 75/227 [00:01<00:03, 40.23it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 35%|███▌      | 80/227 [00:01<00:03, 40.20it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 37%|███▋      | 85/227 [00:02<00:03, 40.67it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 40%|███▉      | 90/227 [00:02<00:03, 40.40it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 42%|████▏     | 95/227 [00:02<00:03, 39.82it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 44%|████▍     | 100/227 [00:02<00:03, 40.19it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 46%|████▋     | 105/227 [00:02<00:03, 40.23it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 48%|████▊     | 110/227 [00:02<00:02, 39.95it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 50%|█████     | 114/227 [00:02<00:02, 39.61it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 52%|█████▏    | 119/227 [00:02<00:02, 40.30it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 55%|█████▍    | 124/227 [00:03<00:02, 39.81it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 56%|█████▋    | 128/227 [00:03<00:02, 39.71it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 58%|█████▊    | 132/227 [00:03<00:02, 39.63it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 60%|██████    | 137/227 [00:03<00:02, 39.94it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])


 62%|██████▏   | 141/227 [00:03<00:02, 38.47it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embeddi

 64%|██████▍   | 146/227 [00:03<00:02, 39.53it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 67%|██████▋   | 151/227 [00:03<00:01, 40.15it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 69%|██████▊   | 156/227 [00:03<00:01, 40.02it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 70%|███████   | 160/227 [00:04<00:01, 38.83it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 73%|███████▎  | 165/227 [00:04<00:01, 39.68it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 75%|███████▍  | 170/227 [00:04<00:01, 40.14it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 77%|███████▋  | 175/227 [00:04<00:01, 40.38it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 79%|███████▉  | 180/227 [00:04<00:01, 40.90it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 81%|████████▏ | 185/227 [00:04<00:01, 39.66it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 84%|████████▎ | 190/227 [00:04<00:00, 40.27it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])


 86%|████████▌ | 195/227 [00:04<00:00, 40.45it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 88%|████████▊ | 200/227 [00:04<00:00, 40.38it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 90%|█████████ | 205/227 [00:05<00:00, 40.87it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 93%|█████████▎| 210/227 [00:05<00:00, 40.41it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 95%|█████████▍| 215/227 [00:05<00:00, 39.17it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 97%|█████████▋| 220/227 [00:05<00:00, 39.97it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 99%|█████████▉| 225/227 [00:05<00:00, 40.58it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


100%|██████████| 227/227 [00:05<00:00, 40.10it/s]


token_embedding : torch.Size([22, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([21, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

  2%|▏         | 5/227 [00:00<00:05, 38.63it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

  4%|▍         | 10/227 [00:00<00:05, 40.07it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

  7%|▋         | 15/227 [00:00<00:05, 40.80it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

  9%|▉         | 20/227 [00:00<00:05, 40.79it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 11%|█         | 25/227 [00:00<00:04, 41.45it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 13%|█▎        | 30/227 [00:00<00:04, 42.02it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 15%|█▌        | 35/227 [00:00<00:04, 42.31it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 18%|█▊        | 40/227 [00:00<00:04, 42.94it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 20%|█▉        | 45/227 [00:01<00:04, 42.63it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 22%|██▏       | 50/227 [00:01<00:04, 42.71it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 24%|██▍       | 55/227 [00:01<00:04, 42.27it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 26%|██▋       | 60/227 [00:01<00:03, 42.14it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 29%|██▊       | 65/227 [00:01<00:03, 41.47it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 31%|███       | 70/227 [00:01<00:03, 41.13it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 33%|███▎      | 75/227 [00:01<00:03, 41.19it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 35%|███▌      | 80/227 [00:01<00:03, 40.73it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 37%|███▋      | 85/227 [00:02<00:03, 40.92it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 40%|███▉      | 90/227 [00:02<00:03, 40.70it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 42%|████▏     | 95/227 [00:02<00:03, 41.14it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 44%|████▍     | 100/227 [00:02<00:03, 40.36it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 46%|████▋     | 105/227 [00:02<00:03, 39.79it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 48%|████▊     | 109/227 [00:02<00:02, 39.73it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 50%|█████     | 114/227 [00:02<00:02, 40.49it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 55%|█████▍    | 124/227 [00:03<00:02, 41.10it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 57%|█████▋    | 129/227 [00:03<00:02, 40.91it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 59%|█████▉    | 134/227 [00:03<00:02, 39.95it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 61%|██████    | 139/227 [00:03<00:02, 39.43it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 63%|██████▎   | 144/227 [00:03<00:02, 39.36it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 65%|██████▌   | 148/227 [00:03<00:02, 39.17it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 69%|██████▊   | 156/227 [00:03<00:01, 38.17it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 73%|███████▎  | 165/227 [00:04<00:01, 39.39it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 76%|███████▌  | 173/227 [00:04<00:01, 39.25it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 78%|███████▊  | 178/227 [00:04<00:01, 40.48it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 81%|████████  | 183/227 [00:04<00:01, 39.01it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 83%|████████▎ | 188/227 [00:04<00:00, 39.84it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 85%|████████▌ | 193/227 [00:04<00:00, 39.85it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 87%|████████▋ | 197/227 [00:04<00:00, 39.48it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 91%|█████████ | 206/227 [00:05<00:00, 38.56it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embeddi

 95%|█████████▌| 216/227 [00:05<00:00, 39.80it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 97%|█████████▋| 221/227 [00:05<00:00, 40.04it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

100%|█████████▉| 226/227 [00:05<00:00, 39.98it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


100%|██████████| 227/227 [00:05<00:00, 40.36it/s]


token_embedding : torch.Size([24, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

  2%|▏         | 4/227 [00:00<00:05, 38.01it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

  4%|▍         | 9/227 [00:00<00:05, 39.19it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

  6%|▌         | 13/227 [00:00<00:05, 38.78it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

  7%|▋         | 17/227 [00:00<00:05, 38.85it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 10%|▉         | 22/227 [00:00<00:05, 39.23it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 11%|█▏        | 26/227 [00:00<00:05, 39.24it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 14%|█▎        | 31/227 [00:00<00:04, 40.25it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 16%|█▌        | 36/227 [00:00<00:04, 39.27it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 18%|█▊        | 41/227 [00:01<00:04, 40.16it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 20%|██        | 46/227 [00:01<00:04, 40.12it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 22%|██▏       | 51/227 [00:01<00:04, 40.15it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 25%|██▍       | 56/227 [00:01<00:04, 39.95it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 27%|██▋       | 61/227 [00:01<00:04, 40.14it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 29%|██▉       | 66/227 [00:01<00:03, 40.41it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 31%|███▏      | 71/227 [00:01<00:03, 39.15it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 33%|███▎      | 75/227 [00:01<00:03, 38.75it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 35%|███▌      | 80/227 [00:02<00:03, 39.14it/s]

token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 37%|███▋      | 84/227 [00:02<00:03, 39.02it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 39%|███▉      | 89/227 [00:02<00:03, 39.34it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 41%|████      | 93/227 [00:02<00:03, 39.40it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 43%|████▎     | 97/227 [00:02<00:03, 39.26it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 44%|████▍     | 101/227 [00:02<00:03, 38.69it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 47%|████▋     | 106/227 [00:02<00:03, 39.41it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 48%|████▊     | 110/227 [00:02<00:03, 38.85it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 50%|█████     | 114/227 [00:02<00:02, 39.16it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 52%|█████▏    | 118/227 [00:03<00:02, 38.46it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])


 54%|█████▎    | 122/227 [00:03<00:02, 38.81it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 56%|█████▌    | 126/227 [00:03<00:02, 38.84it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 58%|█████▊    | 131/227 [00:03<00:02, 40.03it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 60%|█████▉    | 136/227 [00:03<00:02, 40.90it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 62%|██████▏   | 141/227 [00:03<00:02, 40.22it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 64%|██████▍   | 146/227 [00:03<00:02, 39.85it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embeddi

 67%|██████▋   | 151/227 [00:03<00:01, 39.96it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 68%|██████▊   | 155/227 [00:03<00:01, 39.04it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 70%|███████   | 159/227 [00:04<00:01, 38.98it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 72%|███████▏  | 164/227 [00:04<00:01, 40.17it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 74%|███████▍  | 169/227 [00:04<00:01, 40.13it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 77%|███████▋  | 174/227 [00:04<00:01, 40.71it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 81%|████████  | 184/227 [00:04<00:01, 41.42it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 85%|████████▌ | 194/227 [00:04<00:00, 40.47it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 88%|████████▊ | 199/227 [00:05<00:00, 40.67it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 90%|████████▉ | 204/227 [00:05<00:00, 41.11it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])


 92%|█████████▏| 209/227 [00:05<00:00, 40.95it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 96%|█████████▋| 219/227 [00:05<00:00, 41.16it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 40.02it/s]


token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


  2%|▏         | 4/227 [00:00<00:05, 39.78it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

  4%|▍         | 9/227 [00:00<00:05, 41.06it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


  6%|▌         | 14/227 [00:00<00:05, 39.82it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 10%|█         | 23/227 [00:00<00:05, 39.50it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 14%|█▎        | 31/227 [00:00<00:04, 39.29it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 17%|█▋        | 39/227 [00:00<00:04, 38.75it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 21%|██        | 48/227 [00:01<00:04, 38.89it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 25%|██▌       | 57/227 [00:01<00:04, 39.34it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 27%|██▋       | 62/227 [00:01<00:04, 40.83it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 32%|███▏      | 72/227 [00:01<00:03, 39.52it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 35%|███▌      | 80/227 [00:02<00:03, 38.57it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 39%|███▉      | 89/227 [00:02<00:03, 39.15it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 43%|████▎     | 97/227 [00:02<00:03, 39.21it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 47%|████▋     | 106/227 [00:02<00:03, 39.86it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([19, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([19, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embeddi

 51%|█████     | 115/227 [00:02<00:02, 40.19it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 55%|█████▌    | 125/227 [00:03<00:02, 40.81it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 57%|█████▋    | 130/227 [00:03<00:02, 40.38it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 62%|██████▏   | 140/227 [00:03<00:02, 40.87it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 66%|██████▌   | 150/227 [00:03<00:01, 40.13it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 70%|███████   | 160/227 [00:04<00:01, 40.03it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 73%|███████▎  | 165/227 [00:04<00:01, 39.91it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 77%|███████▋  | 175/227 [00:04<00:01, 39.73it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 81%|████████  | 184/227 [00:04<00:01, 40.27it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 85%|████████▌ | 194/227 [00:04<00:00, 39.95it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 88%|████████▊ | 199/227 [00:05<00:00, 40.69it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 92%|█████████▏| 209/227 [00:05<00:00, 40.97it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 96%|█████████▋| 219/227 [00:05<00:00, 40.11it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 39.79it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi




token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

  2%|▏         | 4/227 [00:00<00:05, 39.59it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

  6%|▌         | 14/227 [00:00<00:05, 37.52it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 10%|█         | 23/227 [00:00<00:05, 39.26it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 14%|█▎        | 31/227 [00:00<00:04, 39.32it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 16%|█▌        | 36/227 [00:00<00:04, 40.31it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 18%|█▊        | 41/227 [00:01<00:04, 39.50it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 20%|██        | 46/227 [00:01<00:04, 39.96it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 25%|██▍       | 56/227 [00:01<00:04, 39.56it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 29%|██▊       | 65/227 [00:01<00:04, 40.08it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 31%|███       | 70/227 [00:01<00:03, 40.27it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 33%|███▎      | 75/227 [00:01<00:03, 40.75it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 35%|███▌      | 80/227 [00:01<00:03, 41.19it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 37%|███▋      | 85/227 [00:02<00:03, 41.39it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 40%|███▉      | 90/227 [00:02<00:03, 40.57it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 42%|████▏     | 95/227 [00:02<00:03, 40.07it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 44%|████▍     | 100/227 [00:02<00:03, 39.54it/s]

token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 46%|████▌     | 104/227 [00:02<00:03, 39.57it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 48%|████▊     | 109/227 [00:02<00:02, 39.45it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])


 50%|█████     | 114/227 [00:02<00:02, 40.66it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 55%|█████▍    | 124/227 [00:03<00:02, 40.07it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 57%|█████▋    | 129/227 [00:03<00:02, 39.91it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 59%|█████▉    | 134/227 [00:03<00:02, 39.64it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 61%|██████    | 138/227 [00:03<00:02, 38.97it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 63%|██████▎   | 142/227 [00:03<00:02, 38.16it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])


 64%|██████▍   | 146/227 [00:03<00:02, 37.95it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 67%|██████▋   | 151/227 [00:03<00:01, 39.14it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])


 68%|██████▊   | 155/227 [00:03<00:01, 38.35it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 70%|███████   | 160/227 [00:04<00:01, 39.57it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 73%|███████▎  | 165/227 [00:04<00:01, 40.38it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 77%|███████▋  | 175/227 [00:04<00:01, 40.34it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 79%|███████▉  | 180/227 [00:04<00:01, 40.63it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 81%|████████▏ | 185/227 [00:04<00:01, 39.58it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 83%|████████▎ | 189/227 [00:04<00:00, 38.89it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 85%|████████▌ | 193/227 [00:04<00:00, 39.08it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 87%|████████▋ | 198/227 [00:04<00:00, 39.96it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 89%|████████▉ | 202/227 [00:05<00:00, 39.65it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 91%|█████████ | 207/227 [00:05<00:00, 39.93it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 96%|█████████▌| 217/227 [00:05<00:00, 41.21it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 39.92it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi




epoch: 13, train loss: 0.0157, valid loss: 0.1928, elapsed time: 5.6893 sec


  2%|▏         | 4/227 [00:00<00:05, 37.72it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

  6%|▌         | 13/227 [00:00<00:05, 39.16it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

  9%|▉         | 21/227 [00:00<00:05, 38.88it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 13%|█▎        | 29/227 [00:00<00:05, 38.45it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 17%|█▋        | 38/227 [00:00<00:04, 39.59it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 21%|██        | 48/227 [00:01<00:04, 40.68it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 23%|██▎       | 53/227 [00:01<00:04, 38.95it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 28%|██▊       | 63/227 [00:01<00:04, 39.74it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 30%|██▉       | 68/227 [00:01<00:04, 39.34it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 32%|███▏      | 73/227 [00:01<00:03, 39.76it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 34%|███▍      | 78/227 [00:01<00:03, 40.15it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 38%|███▊      | 87/227 [00:02<00:03, 38.42it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 43%|████▎     | 97/227 [00:02<00:03, 39.17it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 44%|████▍     | 101/227 [00:02<00:03, 39.28it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 47%|████▋     | 106/227 [00:02<00:03, 39.68it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 48%|████▊     | 110/227 [00:02<00:02, 39.55it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 51%|█████     | 115/227 [00:02<00:02, 39.81it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 53%|█████▎    | 120/227 [00:03<00:02, 40.36it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 57%|█████▋    | 130/227 [00:03<00:02, 39.77it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embeddi

 59%|█████▉    | 135/227 [00:03<00:02, 40.10it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embeddi

 62%|██████▏   | 140/227 [00:03<00:02, 38.47it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])


 64%|██████▍   | 145/227 [00:03<00:02, 40.04it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([20, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([20, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 68%|██████▊   | 155/227 [00:03<00:01, 39.80it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 70%|███████   | 160/227 [00:04<00:01, 40.72it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 73%|███████▎  | 165/227 [00:04<00:01, 39.92it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 75%|███████▍  | 170/227 [00:04<00:01, 40.23it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 77%|███████▋  | 175/227 [00:04<00:01, 40.02it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 79%|███████▉  | 180/227 [00:04<00:01, 39.56it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 83%|████████▎ | 189/227 [00:04<00:00, 40.45it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 85%|████████▌ | 194/227 [00:04<00:00, 41.00it/s]

token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 88%|████████▊ | 199/227 [00:05<00:00, 41.46it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 90%|████████▉ | 204/227 [00:05<00:00, 41.32it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 92%|█████████▏| 209/227 [00:05<00:00, 40.92it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])


 94%|█████████▍| 214/227 [00:05<00:00, 41.04it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 99%|█████████▊| 224/227 [00:05<00:00, 40.75it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 39.88it/s]


token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

  2%|▏         | 5/227 [00:00<00:05, 40.23it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

  4%|▍         | 10/227 [00:00<00:05, 40.97it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

  7%|▋         | 15/227 [00:00<00:05, 38.99it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


  9%|▉         | 20/227 [00:00<00:05, 40.03it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([20, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([20, 1, 240])
token_embeddi

 11%|█         | 25/227 [00:00<00:04, 41.00it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 13%|█▎        | 30/227 [00:00<00:04, 41.07it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 15%|█▌        | 35/227 [00:00<00:04, 40.59it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 18%|█▊        | 40/227 [00:00<00:04, 40.34it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 20%|█▉        | 45/227 [00:01<00:04, 40.31it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 22%|██▏       | 50/227 [00:01<00:04, 39.73it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 24%|██▍       | 54/227 [00:01<00:04, 39.51it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 26%|██▌       | 59/227 [00:01<00:04, 40.06it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 28%|██▊       | 64/227 [00:01<00:04, 39.17it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 30%|██▉       | 68/227 [00:01<00:04, 38.74it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 32%|███▏      | 72/227 [00:01<00:03, 38.77it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])


 33%|███▎      | 76/227 [00:01<00:04, 37.14it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 36%|███▌      | 81/227 [00:02<00:03, 38.48it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 38%|███▊      | 86/227 [00:02<00:03, 39.64it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embeddi

 40%|███▉      | 90/227 [00:02<00:03, 38.39it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 41%|████▏     | 94/227 [00:02<00:03, 38.71it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 44%|████▎     | 99/227 [00:02<00:03, 39.75it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 46%|████▌     | 104/227 [00:02<00:03, 40.52it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 48%|████▊     | 109/227 [00:02<00:02, 40.36it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 50%|█████     | 114/227 [00:02<00:02, 40.43it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 52%|█████▏    | 119/227 [00:02<00:02, 40.54it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 55%|█████▍    | 124/227 [00:03<00:02, 40.23it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])


 57%|█████▋    | 129/227 [00:03<00:02, 39.24it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 59%|█████▊    | 133/227 [00:03<00:02, 39.15it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 60%|██████    | 137/227 [00:03<00:02, 39.07it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 62%|██████▏   | 141/227 [00:03<00:02, 39.30it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 64%|██████▍   | 146/227 [00:03<00:02, 39.83it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 66%|██████▌   | 150/227 [00:03<00:01, 39.72it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 68%|██████▊   | 154/227 [00:03<00:01, 38.91it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 72%|███████▏  | 163/227 [00:04<00:01, 37.72it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 76%|███████▌  | 172/227 [00:04<00:01, 39.38it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([20, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([20, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 78%|███████▊  | 177/227 [00:04<00:01, 39.58it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 80%|████████  | 182/227 [00:04<00:01, 40.04it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 82%|████████▏ | 187/227 [00:04<00:01, 39.64it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 86%|████████▋ | 196/227 [00:04<00:00, 39.47it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 90%|█████████ | 205/227 [00:05<00:00, 40.19it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 94%|█████████▍| 214/227 [00:05<00:00, 39.39it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 98%|█████████▊| 223/227 [00:05<00:00, 39.85it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 39.61it/s]


token_embedding : torch.Size([27, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

  2%|▏         | 5/227 [00:00<00:05, 41.48it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

  4%|▍         | 10/227 [00:00<00:05, 41.86it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

  7%|▋         | 15/227 [00:00<00:05, 41.62it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

  9%|▉         | 20/227 [00:00<00:05, 41.28it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 11%|█         | 25/227 [00:00<00:04, 40.70it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 13%|█▎        | 30/227 [00:00<00:04, 40.21it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 15%|█▌        | 35/227 [00:00<00:04, 40.72it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 18%|█▊        | 40/227 [00:00<00:04, 40.17it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 20%|█▉        | 45/227 [00:01<00:04, 40.64it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 22%|██▏       | 50/227 [00:01<00:04, 40.81it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])


 24%|██▍       | 55/227 [00:01<00:04, 41.20it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 26%|██▋       | 60/227 [00:01<00:04, 40.36it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 29%|██▊       | 65/227 [00:01<00:03, 40.55it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 31%|███       | 70/227 [00:01<00:03, 40.60it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 33%|███▎      | 75/227 [00:01<00:03, 39.95it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 35%|███▌      | 80/227 [00:01<00:03, 40.21it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 37%|███▋      | 85/227 [00:02<00:03, 39.90it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 40%|███▉      | 90/227 [00:02<00:03, 40.40it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 42%|████▏     | 95/227 [00:02<00:03, 40.32it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 44%|████▍     | 100/227 [00:02<00:03, 40.12it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 46%|████▋     | 105/227 [00:02<00:03, 40.37it/s]

token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 48%|████▊     | 110/227 [00:02<00:02, 40.20it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])


 51%|█████     | 115/227 [00:02<00:02, 39.79it/s]

positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_en

 53%|█████▎    | 120/227 [00:02<00:02, 39.97it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 55%|█████▍    | 124/227 [00:03<00:02, 39.49it/s]

token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 56%|█████▋    | 128/227 [00:03<00:02, 39.00it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 58%|█████▊    | 132/227 [00:03<00:02, 38.79it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 60%|██████    | 137/227 [00:03<00:02, 39.47it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 62%|██████▏   | 141/227 [00:03<00:02, 39.58it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 64%|██████▍   | 146/227 [00:03<00:02, 39.62it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 67%|██████▋   | 151/227 [00:03<00:01, 39.99it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 68%|██████▊   | 155/227 [00:03<00:01, 39.91it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 72%|███████▏  | 164/227 [00:04<00:01, 40.52it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 77%|███████▋  | 174/227 [00:04<00:01, 39.87it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 81%|████████  | 183/227 [00:04<00:01, 39.68it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 83%|████████▎ | 188/227 [00:04<00:00, 39.72it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 86%|████████▋ | 196/227 [00:04<00:00, 39.26it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 91%|█████████ | 206/227 [00:05<00:00, 39.18it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embeddi

 94%|█████████▍| 214/227 [00:05<00:00, 38.73it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 99%|█████████▊| 224/227 [00:05<00:00, 40.02it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 40.03it/s]


token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([26, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embeddi

  2%|▏         | 5/227 [00:00<00:05, 39.48it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

  4%|▍         | 9/227 [00:00<00:05, 39.60it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

  6%|▌         | 14/227 [00:00<00:05, 40.40it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

  8%|▊         | 19/227 [00:00<00:05, 37.99it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 11%|█         | 24/227 [00:00<00:05, 38.94it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 12%|█▏        | 28/227 [00:00<00:05, 38.94it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 15%|█▍        | 33/227 [00:00<00:04, 39.59it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 17%|█▋        | 38/227 [00:00<00:04, 40.56it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 19%|█▉        | 43/227 [00:01<00:04, 39.46it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 21%|██        | 48/227 [00:01<00:04, 39.56it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 23%|██▎       | 52/227 [00:01<00:04, 39.55it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 25%|██▍       | 56/227 [00:01<00:04, 39.17it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 26%|██▋       | 60/227 [00:01<00:04, 38.87it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])


 28%|██▊       | 64/227 [00:01<00:04, 38.58it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 33%|███▎      | 74/227 [00:01<00:03, 39.87it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 37%|███▋      | 83/227 [00:02<00:03, 39.91it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 39%|███▉      | 88/227 [00:02<00:03, 40.44it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 41%|████      | 93/227 [00:02<00:03, 40.72it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 43%|████▎     | 98/227 [00:02<00:03, 40.18it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 45%|████▌     | 103/227 [00:02<00:03, 40.14it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 48%|████▊     | 108/227 [00:02<00:02, 40.51it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 50%|████▉     | 113/227 [00:02<00:02, 40.35it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 52%|█████▏    | 118/227 [00:02<00:02, 39.61it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 54%|█████▍    | 123/227 [00:03<00:02, 40.21it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 56%|█████▋    | 128/227 [00:03<00:02, 39.96it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 59%|█████▊    | 133/227 [00:03<00:02, 40.11it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 61%|██████    | 138/227 [00:03<00:02, 40.34it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 63%|██████▎   | 143/227 [00:03<00:02, 39.80it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 65%|██████▍   | 147/227 [00:03<00:02, 39.51it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 67%|██████▋   | 151/227 [00:03<00:01, 39.12it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 68%|██████▊   | 155/227 [00:03<00:01, 38.81it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 70%|███████   | 160/227 [00:04<00:01, 39.17it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 73%|███████▎  | 165/227 [00:04<00:01, 39.83it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 74%|███████▍  | 169/227 [00:04<00:01, 38.94it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 77%|███████▋  | 174/227 [00:04<00:01, 39.01it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 78%|███████▊  | 178/227 [00:04<00:01, 38.61it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 81%|████████  | 183/227 [00:04<00:01, 39.55it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 82%|████████▏ | 187/227 [00:04<00:01, 39.43it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 84%|████████▍ | 191/227 [00:04<00:00, 38.44it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 86%|████████▋ | 196/227 [00:04<00:00, 39.01it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 88%|████████▊ | 200/227 [00:05<00:00, 39.03it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 90%|████████▉ | 204/227 [00:05<00:00, 38.19it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 92%|█████████▏| 208/227 [00:05<00:00, 38.64it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 93%|█████████▎| 212/227 [00:05<00:00, 38.34it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 95%|█████████▌| 216/227 [00:05<00:00, 37.72it/s]

token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 97%|█████████▋| 220/227 [00:05<00:00, 37.88it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


100%|██████████| 227/227 [00:05<00:00, 39.46it/s]


token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddin

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


  2%|▏         | 4/227 [00:00<00:05, 39.07it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

  4%|▎         | 8/227 [00:00<00:05, 38.72it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


  5%|▌         | 12/227 [00:00<00:05, 38.31it/s]

token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

  7%|▋         | 16/227 [00:00<00:05, 38.76it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


  9%|▉         | 20/227 [00:00<00:05, 37.78it/s]

token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 11%|█         | 24/227 [00:00<00:05, 37.90it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 12%|█▏        | 28/227 [00:00<00:05, 37.85it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embeddi

 15%|█▍        | 33/227 [00:00<00:04, 38.81it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 16%|█▋        | 37/227 [00:00<00:04, 39.15it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 18%|█▊        | 41/227 [00:01<00:04, 39.36it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


 20%|█▉        | 45/227 [00:01<00:04, 39.44it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 22%|██▏       | 49/227 [00:01<00:04, 39.10it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 23%|██▎       | 53/227 [00:01<00:04, 39.05it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 25%|██▌       | 57/227 [00:01<00:04, 39.16it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 27%|██▋       | 62/227 [00:01<00:04, 39.81it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 29%|██▉       | 66/227 [00:01<00:04, 39.20it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 31%|███       | 70/227 [00:01<00:04, 38.80it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 33%|███▎      | 74/227 [00:01<00:03, 38.56it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])


 34%|███▍      | 78/227 [00:02<00:03, 37.89it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 36%|███▌      | 82/227 [00:02<00:03, 38.32it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])


 38%|███▊      | 86/227 [00:02<00:03, 38.04it/s]

token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 40%|███▉      | 90/227 [00:02<00:03, 38.16it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 42%|████▏     | 95/227 [00:02<00:03, 39.04it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 45%|████▌     | 103/227 [00:02<00:03, 37.64it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 49%|████▉     | 111/227 [00:02<00:03, 37.99it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 53%|█████▎    | 120/227 [00:03<00:02, 38.22it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 56%|█████▋    | 128/227 [00:03<00:02, 37.41it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 59%|█████▊    | 133/227 [00:03<00:02, 38.37it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 60%|██████    | 137/227 [00:03<00:02, 38.44it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 63%|██████▎   | 142/227 [00:03<00:02, 38.93it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embeddi

 64%|██████▍   | 146/227 [00:03<00:02, 38.03it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 66%|██████▌   | 150/227 [00:03<00:02, 37.44it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 68%|██████▊   | 154/227 [00:04<00:01, 37.69it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 70%|██████▉   | 158/227 [00:04<00:01, 37.21it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 71%|███████▏  | 162/227 [00:04<00:01, 37.70it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([38, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([38, 1, 240])


 73%|███████▎  | 166/227 [00:04<00:01, 37.35it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 75%|███████▍  | 170/227 [00:04<00:01, 37.94it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])


 77%|███████▋  | 174/227 [00:04<00:01, 36.93it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 79%|███████▉  | 179/227 [00:04<00:01, 38.23it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 81%|████████  | 183/227 [00:04<00:01, 36.38it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embeddi

 84%|████████▍ | 191/227 [00:05<00:00, 37.06it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 88%|████████▊ | 200/227 [00:05<00:00, 38.44it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 92%|█████████▏| 208/227 [00:05<00:00, 38.79it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 93%|█████████▎| 212/227 [00:05<00:00, 39.12it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([21, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([21, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 95%|█████████▌| 216/227 [00:05<00:00, 38.68it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 97%|█████████▋| 220/227 [00:05<00:00, 38.79it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 38.32it/s]


token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 72, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding

  0%|          | 0/227 [00:00<?, ?it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


  2%|▏         | 4/227 [00:00<00:06, 35.22it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

  4%|▎         | 8/227 [00:00<00:05, 37.34it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])


  5%|▌         | 12/227 [00:00<00:05, 36.28it/s]

token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

  7%|▋         | 17/227 [00:00<00:05, 38.24it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


  9%|▉         | 21/227 [00:00<00:05, 38.72it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 11%|█▏        | 26/227 [00:00<00:04, 40.22it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])


 14%|█▎        | 31/227 [00:00<00:04, 39.54it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

 15%|█▌        | 35/227 [00:00<00:04, 38.45it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([40, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([40, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 17%|█▋        | 39/227 [00:01<00:04, 38.33it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi

 19%|█▉        | 43/227 [00:01<00:04, 38.28it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 21%|██        | 48/227 [00:01<00:04, 39.12it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embeddi

 23%|██▎       | 52/227 [00:01<00:04, 38.49it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embeddi

 25%|██▍       | 56/227 [00:01<00:04, 38.83it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 26%|██▋       | 60/227 [00:01<00:04, 39.11it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 29%|██▊       | 65/227 [00:01<00:04, 40.04it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([41, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([41, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 30%|███       | 69/227 [00:01<00:04, 38.95it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 32%|███▏      | 73/227 [00:01<00:03, 38.56it/s]

token_embedding : torch.Size([44, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([44, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 34%|███▍      | 77/227 [00:02<00:03, 37.71it/s]

token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([39, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([39, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 36%|███▌      | 81/227 [00:02<00:03, 37.70it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 37%|███▋      | 85/227 [00:02<00:03, 38.21it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([45, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([45, 1, 240])
token_embeddi

 39%|███▉      | 89/227 [00:02<00:03, 37.29it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])


 41%|████▏     | 94/227 [00:02<00:03, 38.31it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 43%|████▎     | 98/227 [00:02<00:03, 38.37it/s]

token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 45%|████▍     | 102/227 [00:02<00:03, 38.52it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embeddi

 47%|████▋     | 106/227 [00:02<00:03, 38.43it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])


 48%|████▊     | 110/227 [00:02<00:03, 38.42it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 50%|█████     | 114/227 [00:02<00:02, 37.85it/s]

token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])


 52%|█████▏    | 118/227 [00:03<00:02, 37.11it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embeddi

 54%|█████▎    | 122/227 [00:03<00:02, 37.56it/s]

token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])


 56%|█████▌    | 126/227 [00:03<00:02, 37.02it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 57%|█████▋    | 130/227 [00:03<00:02, 37.26it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])


 59%|█████▉    | 134/227 [00:03<00:02, 37.26it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 61%|██████    | 138/227 [00:03<00:02, 37.63it/s]

token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([20, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([20, 1, 240])


 63%|██████▎   | 143/227 [00:03<00:02, 38.80it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([23, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([23, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embeddi

 65%|██████▍   | 147/227 [00:03<00:02, 38.06it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 67%|██████▋   | 152/227 [00:03<00:01, 38.87it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embeddi

 70%|███████   | 160/227 [00:04<00:01, 38.64it/s]

token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([33, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([33, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 74%|███████▍  | 168/227 [00:04<00:01, 38.11it/s]

token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([34, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([34, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([37, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([37, 1, 240])
token_embeddi

 78%|███████▊  | 176/227 [00:04<00:01, 37.66it/s]

token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([22, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([22, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([36, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([36, 1, 240])
token_embedding : torch.Size([35, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([35, 1, 240])
token_embeddi

 80%|███████▉  | 181/227 [00:04<00:01, 38.81it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi

 82%|████████▏ | 186/227 [00:04<00:01, 38.96it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 84%|████████▍ | 191/227 [00:04<00:00, 39.48it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 86%|████████▋ | 196/227 [00:05<00:00, 39.86it/s]

token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])


 88%|████████▊ | 200/227 [00:05<00:00, 39.49it/s]

token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embeddi

 90%|█████████ | 205/227 [00:05<00:00, 39.72it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])


 92%|█████████▏| 209/227 [00:05<00:00, 39.60it/s]

token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embedding : torch.Size([28, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([28, 1, 240])
token_embeddi

 96%|█████████▌| 217/227 [00:05<00:00, 38.50it/s]

token_embedding : torch.Size([25, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([25, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([26, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([26, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embeddi

100%|██████████| 227/227 [00:05<00:00, 38.47it/s]

token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([24, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([24, 1, 240])
token_embedding : torch.Size([27, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([27, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embeddi




token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([30, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([30, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([32, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([32, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([29, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([29, 1, 240])
token_embedding : torch.Size([31, 128, 240])
positional_encoding : torch.Size([5000, 1, 240])
positional_encoding : torch.Size([31, 1, 240])
token_embeddi