In [34]:
import os
import sys
import torch
import numpy as np
from torch.utils.data import DataLoader
from torchmetrics.classification import MultilabelAccuracy
from torchmetrics.classification import MultilabelAUROC
import pytorch_lightning as pl
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn.functional as F
sys.path.append('../..')
from multi_modal_heart.model.ecg_net_attention import ECGEncoder,ECGAttentionAE
from multi_modal_heart.model.ecg_net import ECGAE
from multi_modal_heart.model.ecg_net import BenchmarkClassifier
from multi_modal_heart.ECG.ecg_dataset import ECGDataset
from torchmetrics import Accuracy

class LitClassifier(pl.LightningModule):
    def __init__(self,encoder,input_dim,num_classes=2,lr=1e-3,freeze_encoder=False):
        super().__init__()
        self.lr =lr
        self.freeze_encoder = freeze_encoder
        self.encoder = encoder
        if self.freeze_encoder:
            self.encoder.eval()
            for param in self.encoder.parameters():
                param.requires_grad = False
        self.accu_metric = Accuracy(task="multiclass",num_classes=num_classes)
        #### add classifier if use benchmark classifier
        self.downsteam_net = BenchmarkClassifier(input_size=input_dim,hidden_size=128,output_size=num_classes)
    def forward(self, x, mask=None):
        latent_code = self.encoder.get_features_after_pooling(x,mask=None)
        print (latent_code.shape)
        return self.downsteam_net(latent_code)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        if self.freeze_encoder:
            self.encoder.eval()
            with torch.no_grad():
                latent_code = self.encoder.get_features_after_pooling(x,mask=None)
        else:
            latent_code = self.encoder.get_features_after_pooling(x,mask=None)

        y_hat = self.downsteam_net(latent_code)
        print (y_hat.shape)
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss,prog_bar=True)

        return loss
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x,mask=None)
        loss = F.cross_entropy(logits, y)
        self.log("test_loss", loss)
        preds = torch.argmax(logits, dim=1)
        self.accu_metric.update(preds, y)
        return loss
    def on_test_epoch_end(self):
        self.log("test_acc", self.accu_metric.compute())
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x,mask=None)
        loss = F.cross_entropy(y_hat, y)
        self.log("val_loss", loss)
        return loss
    def configure_optimizers(self):
        return torch.optim.AdamW(filter(lambda p: p.requires_grad, self.parameters()), lr=self.lr, weight_decay=1e-4)
    
def print_result(probs,super_classes_labels, topk=1):
    probs, label_indices = torch.topk(probs, topk)
    probs = probs.tolist()
    label_indices = label_indices.tolist()
    for prob, idx in zip(probs, label_indices):
        label = super_classes_labels[idx]
        print(f'{label} ({idx}):', round(prob, 4))
def calc_hamming_score(y_true, y_pred):
    return (
        (y_true & y_pred).sum(axis=1) / (y_true | y_pred).sum(axis=1)
    ).mean()    
# ecg_net = ECGAttentionAE(num_leads=12, time_steps=1024, z_dims=512, linear_out=512, downsample_factor=5, base_feature_dim=4,if_VAE=False,
#                          use_attention_pool=False,no_linear_in_E=True, apply_lead_mask=False, no_time_attention=False)
# classification_net = LitClassifier(encoder=ecg_net.encoder,input_dim=512,num_classes=5)
# # checkpoint_path  ="../../log_finetune/ECG_attention_512_raw_no_attention_pool_no_linear_abl_no_time_attention_ms_resnet/checkpoints/last-v3.ckpt"
# # checkpoint_path  ="../../log_finetune/ECG_attention_512_raw_no_attention_pool_no_linear_ms_resnet_ECG2Text/checkpoints/last-v5.ckpt"
# checkpoint_path = "../../log_finetune/ECG_attention_512_raw_no_attention_pool_no_linear_ms_resnet/checkpoints/last-v8.ckpt"
# print (torch.load(checkpoint_path)["state_dict"].keys())
# mm_checkpoint = torch.load(checkpoint_path)["state_dict"]
# encoder_params = {(".").join(key.split(".")[1:]):value for key, value in mm_checkpoint.items() if str(key).startswith("encoder")}
# classification_params = {(".").join(key.split(".")[1:]):value for key, value in mm_checkpoint.items() if str(key).startswith("downsteam_net")}
# classification_net.encoder.load_state_dict(encoder_params)
# classification_net.downsteam_net.load_state_dict(classification_params)



In [None]:
## load MI data from UKB
from scipy.stats import zscore

hf_data_path = "/home/engs2522/project/multi-modal-heart/multi_modal_heart/toolkits/ukb/non_imaging_information/HF/batched_ecg_full_wave.npy"
healthy_data_path = "/home/engs2522/project/multi-modal-heart/multi_modal_heart/toolkits/ukb/non_imaging_information/non_CVD/batched_ecg_full_wave.npy"
time_steps = 1024
hf_data = np.load(hf_data_path)
healthy_data = np.load(healthy_data_path)
hf_data = zscore(hf_data,axis=-1)
healthy_data = zscore(healthy_data,axis=-1)
hf_data = np.nan_to_num(hf_data)
healthy_data = np.nan_to_num(healthy_data)


## pad the data to 608
pad_num = (time_steps-healthy_data.shape[-1])//2
hf_data = np.pad(hf_data,((0,0),(0,0),(pad_num,pad_num)),"constant",constant_values=0)
healthy_data = np.pad(healthy_data,((0,0),(0,0),(pad_num,pad_num)),"constant",constant_values=0)


labels = np.concatenate([np.ones(hf_data.shape[0]),np.zeros(healthy_data.shape[0])])
data = np.concatenate([hf_data,healthy_data],axis=0)
print (data.shape)

## split the data into train validate and test, 40% for train, 10% for validate, 50% for test
from sklearn.model_selection import train_test_split
X_trainval, X_test, y_trainval, y_test = train_test_split(data, labels, test_size=0.5, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, random_state=42)
print ('num of training data:{}, heartfailure ratio:{}'.format(X_train.shape[0],y_train.sum()/y_train.shape[0]))

print ('num of validation data:{}, heartfailure ratio:{}'.format(X_val.shape[0],y_val.sum()/y_val.shape[0]))

print ('num of test data:{}, heartfailure ratio:{}'.format(X_test.shape[0],y_test.sum()/y_test.shape[0]))


In [None]:
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
import pytorch_lightning as pl

batch_size  = 32
tensor_x = torch.Tensor(X_train) # transform to torch tensor
tensor_y = torch.Tensor(y_train).long()

my_train_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
my_dataloader = DataLoader(my_train_dataset,batch_size=batch_size) # create your dataloader

## validation data
tensor_x = torch.Tensor(X_val) # transform to torch tensor
tensor_y = torch.Tensor(y_val).long()
my_val_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
my_val_dataloader = DataLoader(my_val_dataset,batch_size=batch_size) # create your dataloader

## test data
tensor_x = torch.Tensor(X_test) # transform to torch tensor
tensor_y = torch.Tensor(y_test).long()
my_test_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
my_test_dataloader = DataLoader(my_test_dataset,batch_size=batch_size) # create your dataloader

In [None]:
class FineTuneLearningRateFinder(pl.callbacks.LearningRateFinder):
    def __init__(self, milestones, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.milestones = milestones

    def on_fit_start(self, *args, **kwargs):
        return

    def on_train_epoch_start(self, trainer, pl_module):
        if trainer.current_epoch in self.milestones or trainer.current_epoch == 0:
            self.lr_find(trainer, pl_module)

In [35]:
import torch
import sys
sys.path.append('../../')
from multi_modal_heart.model.ecg_net import ECGAE
import torch.nn as nn
use_median_wave = False
pl.seed_everything(42)
model_name = "ECG_attention_512_raw_no_attention_pool_no_linear_ms_resnet"
if model_name=="resnet1d101_512+benchmark_classifier_ms_resnet":
    ecg_net= ECGAE(encoder_type="resnet1d101",in_channels=12,ECG_length=time_steps,decoder_type="ms_resnet",
                    embedding_dim=256,latent_code_dim=512,
                    add_time=False,
                    encoder_mha = False,
                    apply_method="",
                    decoder_outdim=12)
    checkpoint_path = "/home/engs2522/project/multi-modal-heart/log_finetune/resnet1d101_512+benchmark_classifier_ms_resnet/checkpoints/checkpoint_best_loss.ckpt"
elif model_name=="ECG_attention_512_raw_no_attention_pool_no_linear_ms_resnet":
    ecg_net  = ECGAttentionAE(num_leads=12, time_steps=time_steps, z_dims=512, linear_out=512, downsample_factor=5, base_feature_dim=4,if_VAE=False,use_attention_pool=False,
                         no_linear_in_E=False, apply_lead_mask=False)
    checkpoint_path = "../../log_finetune/ECG_attention_512_raw_no_attention_pool_no_linear_ms_resnet/checkpoints/checkpoint_best_loss.ckpt"
else:
    raise NotImplementedError

num_classes = 2 ## for binary classification
freeze_encoder = False
train_from_scratch = True
classification_net = LitClassifier(encoder=ecg_net.encoder,input_dim=512,num_classes=num_classes,lr=1e-3,freeze_encoder=freeze_encoder)
# resnet_checkpoint = '../../log_finetune/resnet1d101_512+benchmark_classifier_ms_resnet/checkpoints/epoch=23-val_auroc:benchmark_classifier/val_macro_auc=0.91.ckpt'
checkpoint = torch.load(checkpoint_path)["state_dict"]
encoder_params = {(".").join(key.split(".")[1:]):value for key, value in checkpoint.items() if str(key).startswith("encoder")}
classification_params = {(".").join(key.split(".")[1:]):value for key, value in checkpoint.items() if str(key).startswith("downsteam_net")}
if not train_from_scratch:
    classification_net.encoder.load_state_dict(encoder_params)


Global seed set to 42


In [32]:
import torch
import sys
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
import os
tb_logger = TensorBoardLogger( f"./finetune_on_HF", name=model_name, version="")  
checkpoint_dir  = os.path.join(tb_logger.log_dir,"checkpoints")

checkpoint_callback_best_loss_min = pl.callbacks.ModelCheckpoint(dirpath=checkpoint_dir, 
                                                    filename='checkpoint_best_loss',
                                                    save_top_k=1, monitor="val_loss"
                                                    , mode='min',save_last=True)


callbacks=[
    # FineTuneLearningRateFinder(milestones=[5, 10],min_lr=1e-5, max_lr=1e-3, 
    #                             mode='linear', early_stop_threshold=4.0),
    checkpoint_callback_best_loss_min
    ]


trainer = pl.Trainer(accelerator="gpu",
                    devices=1, max_epochs=400,
                    logger=tb_logger,log_every_n_steps=1,check_val_every_n_epoch = 1,
                    callbacks=callbacks,
                    )
trainer.fit(classification_net, train_dataloaders=my_dataloader,val_dataloaders=my_val_dataloader)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type                | Params
------------------------------------------------------
0 | encoder       | ECGEncoder          | 3.6 M 
1 | accu_metric   | MulticlassAccuracy  | 0     
2 | downsteam_net | BenchmarkClassifier | 67.2 K
------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.575    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])


  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])
torch.Size([32, 2])


Validation: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])


`Trainer.fit` stopped: `max_epochs=400` reached.


In [33]:
result = trainer.test(classification_net,my_test_dataloader,ckpt_path="best")


Restoring states from the checkpoint path at /home/engs2522/project/multi-modal-heart/multi_modal_heart/tasks/finetune_on_HF/ECG_attention_512_raw_no_attention_pool_no_linear_ms_resnet/checkpoints/checkpoint_best_loss-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Loaded model weights from the checkpoint at /home/engs2522/project/multi-modal-heart/multi_modal_heart/tasks/finetune_on_HF/ECG_attention_512_raw_no_attention_pool_no_linear_ms_resnet/checkpoints/checkpoint_best_loss-v3.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
attention AE is enabled
torch.Size([32, 512])
