## Dataset

In [1]:
import numpy as np
import struct
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
def load_mnist_images(filename):
    with open(filename, 'rb') as f:
        # Leggi intestazione: magic number, numero immagini, righe, colonne
        magic, num_images, rows, cols = struct.unpack(">IIII", f.read(16))
        # Leggi tutti i pixel e convertili in array numpy
        images = np.frombuffer(f.read(), dtype=np.uint8)
        # Ridimensiona l'array in (num_images, rows, cols)
        images = images.reshape((num_images, rows, cols))
    return images

def load_mnist_labels(filename):
    with open(filename, 'rb') as f:
        magic, num_labels = struct.unpack(">II", f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
    return labels
#-------------- Data Extraction ---------------------------

train_images = load_mnist_images('MNIST/train-images-idx3-ubyte')
train_labels = load_mnist_labels('MNIST/train-labels-idx1-ubyte')

test_images = load_mnist_images('MNIST/t10k-images.idx3-ubyte')
test_labels = load_mnist_labels('MNIST/t10k-labels.idx1-ubyte')

#--------------- Train data manipulation ------------------
print(train_images.shape)  # (60000, 28, 28)
print(train_labels.shape)  # (60000,)
one_hot_labels = np.zeros(train_labels.shape[0]*10).reshape((train_labels.shape[0]),10)
for i in range(len(train_labels)):
    one_hot_labels[i][train_labels[i]]=1
train_labels = one_hot_labels
print(train_labels.shape) # (60000,10)

#--------------- Test data manipulation -------------------
print(test_images.shape)  # (10000, 28, 28)
print(test_labels.shape)  # (10000,)
one_hot_labels = np.zeros(test_labels.shape[0]*10).reshape((test_labels.shape[0]),10)
for i in range(len(test_labels)):
    one_hot_labels[i][test_labels[i]]=1
test_labels = one_hot_labels
print(test_labels.shape) # (10000,10)

(60000, 28, 28)
(60000,)
(60000, 10)
(10000, 28, 28)
(10000,)
(10000, 10)


## CNN - PyTorch

The PyTorch model will be used as a reference to compute the weights since it's the fastest in training and the least prone to errors. If everything is written well, both slow and fast implementations of a CNN in numpy will give the same result, since the weights are the same.

### Model and Dataset Declaration with Training

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
from tqdm import tqdm 

# 1.------------------ CNN declaration -------------------

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()

        # --------- Convolutional Layers ------------
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=2, stride=2, padding=0)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=2, stride=2, padding=1)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=2, stride=2, padding=0)
        self.relu3 = nn.ReLU()
        # ---------- Flatten to become MLP's input -----------
        self.flatten = nn.Flatten()
        fc_input_size = 128 * 4 * 4
        # ---------- Multi Layer Perceptron ---------------
        # Only one hidden layer for classification
        self.fc1 = nn.Linear(in_features=fc_input_size, out_features=250)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(in_features=250, out_features=num_classes)

    def forward(self, x):
        # First convolution: from 1x1x28x28 to 1x32x14x14
        x = self.conv1(x)
        x = self.relu1(x)
        # Second Convolution: from 1x32x14x14 to 1x64x8x8
        x = self.conv2(x)
        x = self.relu2(x)
        print(x.shape)
        # Third Convolution: from 1x64x8x8 to 1x128x4x4
        x = self.conv3(x)
        x = self.relu3(x)
        print(x.shape)
        # Flatten
        x = self.flatten(x)
        print(x.shape)
        # MLP
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)

        return x

# 2.------------------ CNN's Dataset declaration ----------------------

class CNNDataset(Dataset):
    def __init__(self, digits, labels, transform=None):
        assert len(digits) == len(labels), "Number of digits and labels doesn't match"
        self.digits = digits
        self.labels = labels

    def __len__(self):
        return len(self.digits)

    def __getitem__(self, idx):
        digit = self.digits[idx]
        label = self.labels[idx]
        digit = digit.unsqueeze(0) # Needed operation to add the dimension of greyscale images (28,28) -> (1,28,28)
        return digit, label

tri = torch.from_numpy(train_images).float() / 255
trl = torch.from_numpy(train_labels).float()
tsi = torch.from_numpy(test_images).float() / 255
tsl = torch.from_numpy(test_labels).float()

train_dataset = CNNDataset(tri,trl)
test_dataset = CNNDataset(tsi,tsl)

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 3.------ Training Setup ---------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"device: {device}")

model = SimpleCNN(num_classes=10).to(device)

# Loss definition
criterion = nn.CrossEntropyLoss() 

# Optimisation definition
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

num_epochs = 5 

# 4.------- cycle training ------

print("\nStarting Training...")
for epoch in range(num_epochs):

    model.train() 

    running_loss = 0.0
    start_time = time.time()
    #tqdm is module used to have a progress bar
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)

    for inputs, labels in progress_bar:

        # move data on the device
        inputs, labels = inputs.to(device), labels.to(device)

        # make all gradients zero to avoid learning on gradients of previous steps
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs) 
        # loss computation
        loss = criterion(outputs, labels)

        # Backward pass: compute the gradients
        loss.backward()

        # Weights update
        optimizer.step()

        # Update the loss
        running_loss += loss.item() * inputs.size(0) # multiply for batch size to obtain the correct mean

        # Update the progress bar
        progress_bar.set_postfix(loss=f"{loss.item():.4f}")

    # Epochs' mean loss computation
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_time = time.time() - start_time

    print(f"Epoch {epoch+1}/{num_epochs} - Tempo: {epoch_time:.2f}s - Training Loss: {epoch_loss:.4f}")

    # --- Test evaluation (after every epoch) ---
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad(): # Disable gradient computation (we don't need gradients since we don't want to update the model in this phase)
        i=0
        for inputs, labels in test_loader:
            if i >= 1:
                continue
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1) # Obtain index with the maximum probability (it is our result)
            _,labels = torch.max(labels,1) # same for the test labels
            total += labels.size(0)
            correct += (predicted==labels).sum().item()
            i+=1

    avg_test_loss = test_loss / len(test_loader.dataset)
    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs} - Test Loss: {avg_test_loss:.4f} - Test Accuracy: {accuracy:.2f}%")


print("\nTraining Complete.")
#2m 9.4 secondi per avere un'epoca con cuda
# save the model
torch.save(model.state_dict(), 'simple_cnn_mnist.pth')

  tri = torch.from_numpy(train_images).float() / 255


device: cuda

Starting Training...


Epoch 1/5:   0%|          | 0/469 [00:00<?, ?it/s]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:   1%|▏         | 6/469 [00:01<01:11,  6.48it/s, loss=2.1473]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
t

Epoch 1/5:   4%|▍         | 18/469 [00:01<00:21, 20.67it/s, loss=1.4516]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
t

Epoch 1/5:   5%|▌         | 24/469 [00:01<00:16, 27.02it/s, loss=0.7056]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:   7%|▋         | 34/469 [00:01<00:13, 33.34it/s, loss=0.9996]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:   9%|▉         | 44/469 [00:02<00:10, 39.19it/s, loss=0.4579]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  12%|█▏        | 54/469 [00:02<00:09, 42.02it/s, loss=0.3957]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  14%|█▎        | 64/469 [00:02<00:09, 44.14it/s, loss=0.4558]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  15%|█▍        | 69/469 [00:02<00:09, 43.58it/s, loss=0.3413]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  17%|█▋        | 79/469 [00:02<00:09, 43.02it/s, loss=0.3713]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  18%|█▊        | 84/469 [00:03<00:08, 43.09it/s, loss=0.3157]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  20%|█▉        | 93/469 [00:03<00:11, 33.61it/s, loss=0.4416]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  22%|██▏       | 101/469 [00:03<00:11, 32.89it/s, loss=0.3295]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  22%|██▏       | 105/469 [00:03<00:11, 32.97it/s, loss=0.3450]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  24%|██▍       | 113/469 [00:03<00:11, 30.69it/s, loss=0.3277]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  26%|██▌       | 121/469 [00:04<00:10, 32.74it/s, loss=0.3222]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  27%|██▋       | 125/469 [00:04<00:10, 33.61it/s, loss=0.3102]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  28%|██▊       | 133/469 [00:04<00:10, 31.29it/s, loss=0.2458]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  30%|███       | 141/469 [00:04<00:09, 33.23it/s, loss=0.2991]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  32%|███▏      | 149/469 [00:04<00:09, 33.07it/s, loss=0.2526]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  33%|███▎      | 157/469 [00:05<00:08, 34.69it/s, loss=0.3552]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  34%|███▍      | 161/469 [00:05<00:08, 35.49it/s, loss=0.2572]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  36%|███▌      | 170/469 [00:05<00:08, 36.33it/s, loss=0.2576]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  38%|███▊      | 178/469 [00:05<00:08, 34.93it/s, loss=0.2361]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  39%|███▉      | 182/469 [00:06<00:08, 34.48it/s, loss=0.1413]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  41%|████      | 190/469 [00:06<00:08, 34.78it/s, loss=0.2575]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  41%|████▏     | 194/469 [00:06<00:10, 26.57it/s, loss=0.2570]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  42%|████▏     | 197/469 [00:06<00:11, 24.03it/s, loss=0.2163]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  44%|████▎     | 205/469 [00:06<00:09, 26.80it/s, loss=0.3126]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  45%|████▍     | 211/469 [00:07<00:10, 25.41it/s, loss=0.1641]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  46%|████▌     | 214/469 [00:07<00:10, 24.07it/s, loss=0.1357]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  47%|████▋     | 220/469 [00:07<00:10, 24.16it/s, loss=0.1681]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  48%|████▊     | 226/469 [00:07<00:09, 25.82it/s, loss=0.2183]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  49%|████▉     | 232/469 [00:07<00:08, 26.55it/s, loss=0.2872]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  51%|█████     | 238/469 [00:08<00:08, 27.46it/s, loss=0.2027]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  52%|█████▏    | 244/469 [00:08<00:08, 26.90it/s, loss=0.1870]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  53%|█████▎    | 250/469 [00:08<00:08, 26.45it/s, loss=0.1679]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  55%|█████▍    | 257/469 [00:08<00:07, 28.67it/s, loss=0.2490]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  57%|█████▋    | 265/469 [00:09<00:06, 30.60it/s, loss=0.3134]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  57%|█████▋    | 269/469 [00:09<00:06, 31.38it/s, loss=0.2006]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  59%|█████▉    | 277/469 [00:09<00:05, 33.40it/s, loss=0.2119]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  61%|██████    | 285/469 [00:09<00:05, 35.78it/s, loss=0.2956]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  62%|██████▏   | 293/469 [00:09<00:05, 32.66it/s, loss=0.2638]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  63%|██████▎   | 297/469 [00:10<00:05, 32.10it/s, loss=0.1423]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  64%|██████▍   | 301/469 [00:10<00:07, 23.60it/s, loss=0.2768]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  65%|██████▌   | 307/469 [00:10<00:06, 24.09it/s, loss=0.1779]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  67%|██████▋   | 313/469 [00:10<00:06, 25.46it/s, loss=0.2305]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  68%|██████▊   | 320/469 [00:10<00:05, 26.57it/s, loss=0.2898]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  69%|██████▉   | 323/469 [00:11<00:05, 25.91it/s, loss=0.1306]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  70%|███████   | 329/469 [00:11<00:06, 22.55it/s, loss=0.1260]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  71%|███████▏  | 335/469 [00:11<00:05, 23.54it/s, loss=0.2844]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  72%|███████▏  | 338/469 [00:11<00:05, 23.26it/s, loss=0.1816]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  73%|███████▎  | 344/469 [00:12<00:05, 24.66it/s, loss=0.1459]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  75%|███████▍  | 350/469 [00:12<00:04, 26.03it/s, loss=0.2068]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  76%|███████▌  | 356/469 [00:12<00:04, 26.71it/s, loss=0.1040]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  77%|███████▋  | 362/469 [00:12<00:03, 27.96it/s, loss=0.1941]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  78%|███████▊  | 368/469 [00:12<00:03, 25.86it/s, loss=0.1103]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  80%|███████▉  | 374/469 [00:13<00:03, 27.04it/s, loss=0.0988]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  81%|████████  | 380/469 [00:13<00:03, 27.88it/s, loss=0.1188]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  83%|████████▎ | 387/469 [00:13<00:02, 30.35it/s, loss=0.0928]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  84%|████████▍ | 395/469 [00:13<00:02, 33.02it/s, loss=0.1086]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  86%|████████▌ | 403/469 [00:14<00:01, 33.36it/s, loss=0.1308]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  87%|████████▋ | 407/469 [00:14<00:01, 32.92it/s, loss=0.0613]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  88%|████████▊ | 415/469 [00:14<00:01, 29.81it/s, loss=0.2397]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  89%|████████▉ | 419/469 [00:14<00:01, 27.32it/s, loss=0.1015]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  91%|█████████ | 425/469 [00:14<00:01, 24.70it/s, loss=0.1407]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  91%|█████████▏| 428/469 [00:15<00:01, 24.91it/s, loss=0.1747]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  92%|█████████▏| 431/469 [00:15<00:01, 24.59it/s, loss=0.0741]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  93%|█████████▎| 437/469 [00:15<00:01, 18.70it/s, loss=0.1198]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  94%|█████████▍| 440/469 [00:15<00:01, 19.51it/s, loss=0.0672]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  94%|█████████▍| 440/469 [00:15<00:01, 19.51it/s, loss=0.1123]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  94%|█████████▍| 440/469 [00:16<00:01, 19.51it/s, loss=0.0668]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  95%|█████████▍| 445/469 [00:16<00:02,  9.80it/s, loss=0.0854]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  96%|█████████▌| 449/469 [00:16<00:01, 11.95it/s, loss=0.1549]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  97%|█████████▋| 454/469 [00:17<00:00, 16.16it/s, loss=0.1428]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  97%|█████████▋| 457/469 [00:17<00:00, 18.00it/s, loss=0.1498]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 1/5:  99%|█████████▊| 463/469 [00:17<00:00, 20.58it/s, loss=0.0845]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])


Epoch 1/5:  99%|█████████▉| 466/469 [00:17<00:00, 20.05it/s, loss=0.1070]

torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


                                                                         

torch.Size([96, 32, 14, 14])
torch.Size([96, 64, 8, 8])
torch.Size([96, 128, 4, 4])
torch.Size([96, 2048])
Epoch 1/5 - Tempo: 17.78s - Training Loss: 0.3248
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])




Epoch 1/5 - Test Loss: 0.0008 - Test Accuracy: 97.66%


Epoch 2/5:   0%|          | 1/469 [00:00<00:58,  8.03it/s, loss=0.0474]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   1%|          | 5/469 [00:00<00:27, 16.67it/s, loss=0.0813]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   1%|▏         | 7/469 [00:00<00:28, 16.41it/s, loss=0.1303]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   2%|▏         | 11/469 [00:00<00:42, 10.66it/s, loss=0.1751]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   3%|▎         | 15/469 [00:01<00:32, 14.03it/s, loss=0.1236]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   4%|▍         | 20/469 [00:01<00:26, 17.03it/s, loss=0.1726]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   5%|▌         | 24/469 [00:01<00:26, 16.52it/s, loss=0.1993]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   6%|▌         | 29/469 [00:01<00:21, 20.02it/s, loss=0.1836]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   7%|▋         | 32/469 [00:02<00:20, 21.02it/s, loss=0.0575]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   8%|▊         | 37/469 [00:02<00:23, 18.22it/s, loss=0.1032]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:   9%|▊         | 41/469 [00:02<00:24, 17.27it/s, loss=0.0539]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  10%|▉         | 46/469 [00:02<00:23, 17.96it/s, loss=0.0824]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  11%|█         | 51/469 [00:03<00:19, 21.07it/s, loss=0.1100]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  12%|█▏        | 57/469 [00:03<00:17, 23.76it/s, loss=0.0658]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  13%|█▎        | 63/469 [00:03<00:17, 22.95it/s, loss=0.1056]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  14%|█▍        | 66/469 [00:03<00:16, 23.80it/s, loss=0.3440]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  15%|█▌        | 72/469 [00:03<00:17, 22.42it/s, loss=0.1317]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  16%|█▌        | 75/469 [00:04<00:16, 23.27it/s, loss=0.0330]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  17%|█▋        | 81/469 [00:04<00:16, 23.73it/s, loss=0.0664]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  18%|█▊        | 84/469 [00:04<00:16, 23.23it/s, loss=0.1295]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  19%|█▊        | 87/469 [00:04<00:18, 20.34it/s, loss=0.0524]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  20%|█▉        | 92/469 [00:05<00:28, 13.41it/s, loss=0.1841]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])


Epoch 2/5:  20%|██        | 94/469 [00:05<00:27, 13.71it/s, loss=0.0569]

torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  21%|██▏       | 100/469 [00:05<00:22, 16.71it/s, loss=0.0432]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  22%|██▏       | 102/469 [00:05<00:22, 16.04it/s, loss=0.0833]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  23%|██▎       | 106/469 [00:06<00:21, 17.18it/s, loss=0.0839]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])


Epoch 2/5:  23%|██▎       | 108/469 [00:06<00:22, 15.83it/s, loss=0.0985]

torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  24%|██▍       | 112/469 [00:06<00:26, 13.31it/s, loss=0.0337]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  25%|██▍       | 117/469 [00:06<00:20, 16.95it/s, loss=0.1459]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  26%|██▌       | 122/469 [00:07<00:19, 18.22it/s, loss=0.0632]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  27%|██▋       | 125/469 [00:07<00:16, 20.71it/s, loss=0.1200]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  27%|██▋       | 128/469 [00:07<00:21, 15.86it/s, loss=0.1080]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  28%|██▊       | 133/469 [00:07<00:18, 18.35it/s, loss=0.1580]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  30%|██▉       | 139/469 [00:07<00:16, 20.24it/s, loss=0.1709]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  31%|███       | 145/469 [00:08<00:14, 22.85it/s, loss=0.0746]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  32%|███▏      | 148/469 [00:08<00:14, 22.68it/s, loss=0.1487]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  32%|███▏      | 151/469 [00:08<00:15, 21.08it/s, loss=0.0596]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  33%|███▎      | 157/469 [00:08<00:15, 20.10it/s, loss=0.1657]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  34%|███▍      | 160/469 [00:08<00:17, 18.06it/s, loss=0.0568]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  35%|███▌      | 165/469 [00:09<00:15, 19.69it/s, loss=0.1914]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  36%|███▌      | 170/469 [00:09<00:15, 19.47it/s, loss=0.0963]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  37%|███▋      | 175/469 [00:09<00:14, 20.25it/s, loss=0.0865]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  39%|███▊      | 181/469 [00:09<00:13, 21.27it/s, loss=0.1618]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  39%|███▉      | 184/469 [00:10<00:13, 20.38it/s, loss=0.0775]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  40%|███▉      | 187/469 [00:10<00:13, 20.38it/s, loss=0.0498]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  41%|████      | 193/469 [00:10<00:13, 20.24it/s, loss=0.0489]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  42%|████▏     | 196/469 [00:10<00:14, 18.64it/s, loss=0.0998]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  43%|████▎     | 200/469 [00:11<00:14, 18.31it/s, loss=0.1986]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  44%|████▎     | 205/469 [00:11<00:14, 18.14it/s, loss=0.0810]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  45%|████▍     | 211/469 [00:11<00:12, 20.57it/s, loss=0.1037]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  46%|████▋     | 217/469 [00:11<00:11, 22.14it/s, loss=0.1316]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  47%|████▋     | 220/469 [00:11<00:11, 22.24it/s, loss=0.0815]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  48%|████▊     | 226/469 [00:12<00:10, 22.80it/s, loss=0.1265]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  49%|████▉     | 232/469 [00:12<00:09, 23.83it/s, loss=0.0424]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  51%|█████     | 238/469 [00:12<00:10, 22.21it/s, loss=0.0962]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  51%|█████▏    | 241/469 [00:12<00:10, 22.64it/s, loss=0.1015]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  53%|█████▎    | 247/469 [00:13<00:09, 23.66it/s, loss=0.0924]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  54%|█████▍    | 253/469 [00:13<00:09, 23.25it/s, loss=0.0454]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  55%|█████▍    | 256/469 [00:13<00:08, 23.68it/s, loss=0.0460]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  56%|█████▌    | 262/469 [00:13<00:09, 22.15it/s, loss=0.0641]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  57%|█████▋    | 268/469 [00:13<00:08, 23.81it/s, loss=0.0639]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  58%|█████▊    | 274/469 [00:14<00:07, 24.80it/s, loss=0.0503]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  60%|█████▉    | 280/469 [00:14<00:07, 24.90it/s, loss=0.1314]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  60%|██████    | 283/469 [00:14<00:07, 24.77it/s, loss=0.1430]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  62%|██████▏   | 289/469 [00:14<00:07, 22.56it/s, loss=0.1465]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  62%|██████▏   | 292/469 [00:15<00:08, 21.46it/s, loss=0.0622]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  64%|██████▎   | 298/469 [00:15<00:07, 24.08it/s, loss=0.0528]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  64%|██████▍   | 301/469 [00:15<00:08, 19.81it/s, loss=0.1541]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  65%|██████▍   | 304/469 [00:15<00:09, 17.42it/s, loss=0.1111]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  65%|██████▍   | 304/469 [00:15<00:09, 17.42it/s, loss=0.0646]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  66%|██████▌   | 308/469 [00:16<00:12, 12.47it/s, loss=0.0827]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  67%|██████▋   | 313/469 [00:16<00:10, 15.40it/s, loss=0.1288]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])


Epoch 2/5:  67%|██████▋   | 315/469 [00:16<00:10, 14.48it/s, loss=0.0416]

torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  68%|██████▊   | 319/469 [00:16<00:10, 13.96it/s, loss=0.1336]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  69%|██████▉   | 324/469 [00:17<00:08, 17.36it/s, loss=0.0460]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  70%|███████   | 329/469 [00:17<00:07, 19.90it/s, loss=0.0737]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  71%|███████   | 332/469 [00:17<00:06, 21.20it/s, loss=0.1003]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  72%|███████▏  | 338/469 [00:17<00:06, 21.04it/s, loss=0.0956]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  73%|███████▎  | 341/469 [00:17<00:06, 20.61it/s, loss=0.0796]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  74%|███████▍  | 347/469 [00:18<00:05, 20.63it/s, loss=0.1225]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  75%|███████▌  | 353/469 [00:18<00:05, 22.20it/s, loss=0.2157]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  77%|███████▋  | 359/469 [00:18<00:04, 23.59it/s, loss=0.0875]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  77%|███████▋  | 362/469 [00:18<00:04, 23.23it/s, loss=0.0639]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  79%|███████▊  | 369/469 [00:19<00:03, 26.48it/s, loss=0.0905]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  80%|████████  | 376/469 [00:19<00:03, 28.82it/s, loss=0.0577]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  81%|████████▏ | 382/469 [00:19<00:03, 28.23it/s, loss=0.0638]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  82%|████████▏ | 385/469 [00:19<00:03, 27.04it/s, loss=0.0837]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  83%|████████▎ | 391/469 [00:19<00:03, 24.50it/s, loss=0.0457]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  85%|████████▍ | 397/469 [00:20<00:03, 23.38it/s, loss=0.0479]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  85%|████████▌ | 400/469 [00:20<00:02, 23.81it/s, loss=0.0673]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  87%|████████▋ | 406/469 [00:20<00:03, 20.41it/s, loss=0.0748]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  87%|████████▋ | 406/469 [00:20<00:03, 20.41it/s, loss=0.0771]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  87%|████████▋ | 409/469 [00:21<00:04, 12.09it/s, loss=0.1532]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  88%|████████▊ | 413/469 [00:21<00:04, 12.89it/s, loss=0.1054]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  88%|████████▊ | 415/469 [00:21<00:03, 13.66it/s, loss=0.0481]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  89%|████████▉ | 419/469 [00:21<00:03, 14.73it/s, loss=0.0355]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  90%|█████████ | 423/469 [00:21<00:02, 16.70it/s, loss=0.0838]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  91%|█████████▏| 428/469 [00:22<00:02, 18.32it/s, loss=0.1046]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  92%|█████████▏| 433/469 [00:22<00:01, 20.02it/s, loss=0.0380]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  93%|█████████▎| 436/469 [00:22<00:01, 20.38it/s, loss=0.0743]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  94%|█████████▍| 442/469 [00:22<00:01, 22.58it/s, loss=0.1452]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  96%|█████████▌| 448/469 [00:23<00:00, 25.41it/s, loss=0.0603]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  97%|█████████▋| 456/469 [00:23<00:00, 28.86it/s, loss=0.0699]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  99%|█████████▊| 462/469 [00:23<00:00, 27.06it/s, loss=0.1208]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 2/5:  99%|█████████▉| 466/469 [00:23<00:00, 28.34it/s, loss=0.1072]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([96, 32, 14, 14])
torch.Size([96, 64, 8, 8])
torch.Size([96, 128, 4, 4])


                                                                         

torch.Size([96, 2048])
Epoch 2/5 - Tempo: 23.74s - Training Loss: 0.0943
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
Epoch 2/5 - Test Loss: 0.0005 - Test Accuracy: 98.44%


Epoch 3/5:   0%|          | 1/469 [00:00<00:56,  8.29it/s, loss=0.0573]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   1%|          | 5/469 [00:00<00:31, 14.51it/s, loss=0.0608]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   2%|▏         | 11/469 [00:00<00:23, 19.27it/s, loss=0.0837]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   3%|▎         | 14/469 [00:00<00:22, 20.61it/s, loss=0.0447]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   4%|▍         | 20/469 [00:01<00:19, 22.76it/s, loss=0.0734]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   6%|▌         | 26/469 [00:01<00:18, 24.52it/s, loss=0.0240]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   6%|▌         | 29/469 [00:01<00:18, 23.55it/s, loss=0.0295]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   7%|▋         | 35/469 [00:01<00:19, 22.09it/s, loss=0.0404]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:   9%|▊         | 41/469 [00:01<00:17, 24.59it/s, loss=0.0317]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  10%|█         | 48/469 [00:02<00:15, 27.63it/s, loss=0.1041]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  11%|█         | 52/469 [00:02<00:14, 28.79it/s, loss=0.0285]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  13%|█▎        | 59/469 [00:02<00:14, 29.17it/s, loss=0.0182]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  14%|█▍        | 66/469 [00:02<00:14, 28.61it/s, loss=0.0897]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  15%|█▌        | 72/469 [00:02<00:14, 28.20it/s, loss=0.0767]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  16%|█▌        | 75/469 [00:03<00:13, 28.49it/s, loss=0.0331]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  17%|█▋        | 81/469 [00:03<00:16, 23.32it/s, loss=0.0340]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  18%|█▊        | 84/469 [00:03<00:16, 23.49it/s, loss=0.0765]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  19%|█▉        | 90/469 [00:03<00:16, 22.52it/s, loss=0.1057]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  20%|█▉        | 93/469 [00:03<00:17, 21.54it/s, loss=0.0297]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  21%|██        | 99/469 [00:04<00:18, 20.03it/s, loss=0.0673]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  22%|██▏       | 102/469 [00:04<00:18, 20.30it/s, loss=0.0377]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  23%|██▎       | 108/469 [00:04<00:17, 20.49it/s, loss=0.0400]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  24%|██▎       | 111/469 [00:04<00:17, 20.69it/s, loss=0.0312]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  25%|██▍       | 117/469 [00:05<00:16, 21.56it/s, loss=0.0581]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  26%|██▌       | 123/469 [00:05<00:15, 22.20it/s, loss=0.0836]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  27%|██▋       | 126/469 [00:05<00:15, 22.84it/s, loss=0.0969]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  28%|██▊       | 132/469 [00:05<00:14, 23.32it/s, loss=0.0325]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  29%|██▉       | 138/469 [00:05<00:13, 24.41it/s, loss=0.0427]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  30%|███       | 142/469 [00:06<00:12, 26.68it/s, loss=0.0242]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  32%|███▏      | 150/469 [00:06<00:10, 29.23it/s, loss=0.0177]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  33%|███▎      | 157/469 [00:06<00:10, 28.91it/s, loss=0.0440]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  34%|███▍      | 161/469 [00:06<00:10, 29.80it/s, loss=0.1473]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  36%|███▌      | 167/469 [00:07<00:11, 27.38it/s, loss=0.0758]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  37%|███▋      | 174/469 [00:07<00:10, 27.47it/s, loss=0.0326]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  38%|███▊      | 180/469 [00:07<00:10, 28.49it/s, loss=0.0357]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  40%|███▉      | 186/469 [00:07<00:10, 28.04it/s, loss=0.0213]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  40%|████      | 189/469 [00:07<00:10, 27.39it/s, loss=0.0571]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  42%|████▏     | 195/469 [00:08<00:11, 23.40it/s, loss=0.0434]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  42%|████▏     | 198/469 [00:08<00:12, 21.65it/s, loss=0.0732]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  43%|████▎     | 204/469 [00:08<00:12, 21.44it/s, loss=0.1027]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  44%|████▍     | 207/469 [00:08<00:12, 20.33it/s, loss=0.0961]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  45%|████▍     | 210/469 [00:09<00:13, 19.92it/s, loss=0.0510]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  46%|████▌     | 216/469 [00:09<00:13, 18.93it/s, loss=0.0708]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  47%|████▋     | 222/469 [00:09<00:11, 22.37it/s, loss=0.0753]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  49%|████▊     | 228/469 [00:09<00:10, 23.92it/s, loss=0.0774]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  50%|████▉     | 234/469 [00:09<00:09, 25.28it/s, loss=0.0812]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  51%|█████     | 238/469 [00:10<00:08, 27.23it/s, loss=0.0488]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  52%|█████▏    | 246/469 [00:10<00:07, 29.04it/s, loss=0.0842]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  53%|█████▎    | 249/469 [00:10<00:08, 26.45it/s, loss=0.0667]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  54%|█████▍    | 255/469 [00:10<00:08, 25.03it/s, loss=0.0501]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  56%|█████▌    | 261/469 [00:10<00:08, 25.17it/s, loss=0.0480]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  57%|█████▋    | 268/469 [00:11<00:07, 27.39it/s, loss=0.0481]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  58%|█████▊    | 274/469 [00:11<00:07, 26.64it/s, loss=0.0387]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  59%|█████▉    | 277/469 [00:11<00:07, 26.04it/s, loss=0.0877]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  60%|██████    | 283/469 [00:11<00:07, 25.02it/s, loss=0.0435]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  61%|██████    | 286/469 [00:11<00:07, 24.09it/s, loss=0.1007]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  62%|██████▏   | 289/469 [00:12<00:08, 20.07it/s, loss=0.0419]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  63%|██████▎   | 294/469 [00:12<00:10, 16.77it/s, loss=0.0308]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  64%|██████▍   | 300/469 [00:12<00:08, 19.29it/s, loss=0.0428]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  65%|██████▍   | 303/469 [00:12<00:08, 19.85it/s, loss=0.0576]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  66%|██████▌   | 309/469 [00:13<00:07, 21.84it/s, loss=0.0610]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  67%|██████▋   | 315/469 [00:13<00:07, 21.46it/s, loss=0.0479]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  68%|██████▊   | 318/469 [00:13<00:07, 19.76it/s, loss=0.0431]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  68%|██████▊   | 321/469 [00:13<00:07, 19.36it/s, loss=0.0996]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  70%|██████▉   | 328/469 [00:14<00:06, 22.70it/s, loss=0.0974]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  71%|███████▏  | 335/469 [00:14<00:04, 27.22it/s, loss=0.0594]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  73%|███████▎  | 343/469 [00:14<00:04, 30.73it/s, loss=0.0430]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  75%|███████▍  | 351/469 [00:14<00:03, 33.32it/s, loss=0.0350]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  76%|███████▌  | 355/469 [00:14<00:03, 34.01it/s, loss=0.0096]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  77%|███████▋  | 363/469 [00:15<00:02, 35.54it/s, loss=0.1085]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  79%|███████▉  | 371/469 [00:15<00:02, 35.19it/s, loss=0.1318]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  81%|████████  | 379/469 [00:15<00:02, 34.31it/s, loss=0.0681]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  82%|████████▏ | 383/469 [00:15<00:02, 33.70it/s, loss=0.0505]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  83%|████████▎ | 391/469 [00:15<00:02, 29.55it/s, loss=0.0495]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  84%|████████▍ | 395/469 [00:16<00:02, 27.46it/s, loss=0.1069]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  86%|████████▌ | 401/469 [00:16<00:02, 27.25it/s, loss=0.0644]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  87%|████████▋ | 409/469 [00:16<00:02, 29.12it/s, loss=0.0695]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  88%|████████▊ | 415/469 [00:16<00:01, 29.31it/s, loss=0.0202]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  89%|████████▉ | 419/469 [00:16<00:01, 30.43it/s, loss=0.0431]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  91%|█████████ | 427/469 [00:17<00:01, 31.66it/s, loss=0.0179]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  93%|█████████▎| 435/469 [00:17<00:01, 32.53it/s, loss=0.0485]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  94%|█████████▍| 443/469 [00:17<00:00, 34.57it/s, loss=0.1183]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  96%|█████████▌| 451/469 [00:17<00:00, 35.60it/s, loss=0.0601]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 3/5:  98%|█████████▊| 460/469 [00:17<00:00, 38.23it/s, loss=0.0718]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


                                                                         

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([96, 32, 14, 14])
torch.Size([96, 64, 8, 8])
torch.Size([96, 128, 4, 4])
torch.Size([96, 2048])
Epoch 3/5 - Tempo: 18.20s - Training Loss: 0.0628
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
Epoch 3/5 - Test Loss: 0.0004 - Test Accuracy: 98.44%


Epoch 4/5:   0%|          | 0/469 [00:00<?, ?it/s, loss=0.0441]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:   0%|          | 2/469 [00:00<00:42, 10.93it/s, loss=0.0441]

torch.Size([128, 32, 14, 14])


Epoch 4/5:   1%|          | 5/469 [00:00<00:26, 17.62it/s, loss=0.0662]

torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:   3%|▎         | 15/469 [00:00<00:12, 35.57it/s, loss=0.0592]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
t

Epoch 4/5:   5%|▌         | 25/469 [00:00<00:10, 42.01it/s, loss=0.0442]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:   7%|▋         | 35/469 [00:00<00:09, 45.12it/s, loss=0.0349]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  10%|▉         | 45/469 [00:01<00:09, 46.49it/s, loss=0.0298]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  11%|█         | 50/469 [00:01<00:09, 44.33it/s, loss=0.0528]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  13%|█▎        | 60/469 [00:01<00:09, 42.06it/s, loss=0.0367]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  14%|█▍        | 65/469 [00:01<00:09, 40.98it/s, loss=0.0696]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  16%|█▌        | 75/469 [00:01<00:09, 41.36it/s, loss=0.0717]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  18%|█▊        | 85/469 [00:02<00:09, 41.54it/s, loss=0.0430]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  19%|█▉        | 90/469 [00:02<00:09, 41.45it/s, loss=0.0103]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  21%|██▏       | 100/469 [00:02<00:08, 41.99it/s, loss=0.0185]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  23%|██▎       | 110/469 [00:02<00:08, 43.08it/s, loss=0.0724]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  26%|██▌       | 120/469 [00:02<00:07, 43.79it/s, loss=0.0271]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  27%|██▋       | 125/469 [00:03<00:07, 44.10it/s, loss=0.0820]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  29%|██▉       | 135/469 [00:03<00:07, 44.94it/s, loss=0.0259]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  31%|███       | 145/469 [00:03<00:07, 45.40it/s, loss=0.0191]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  33%|███▎      | 155/469 [00:03<00:06, 46.87it/s, loss=0.0292]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
t

Epoch 4/5:  35%|███▌      | 165/469 [00:03<00:06, 47.27it/s, loss=0.0413]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  36%|███▌      | 170/469 [00:04<00:06, 47.58it/s, loss=0.0438]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  38%|███▊      | 180/469 [00:04<00:06, 44.42it/s, loss=0.0284]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  41%|████      | 190/469 [00:04<00:06, 43.62it/s, loss=0.0357]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  43%|████▎     | 200/469 [00:04<00:06, 43.87it/s, loss=0.0458]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  44%|████▎     | 205/469 [00:04<00:06, 42.65it/s, loss=0.0715]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  46%|████▌     | 215/469 [00:05<00:05, 42.37it/s, loss=0.0215]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  47%|████▋     | 220/469 [00:05<00:05, 42.31it/s, loss=0.0331]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  49%|████▉     | 229/469 [00:05<00:07, 33.02it/s, loss=0.0348]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  50%|████▉     | 233/469 [00:05<00:07, 31.32it/s, loss=0.0416]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  51%|█████▏    | 241/469 [00:05<00:07, 32.03it/s, loss=0.0644]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  53%|█████▎    | 249/469 [00:06<00:06, 31.81it/s, loss=0.0635]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  54%|█████▍    | 253/469 [00:06<00:08, 26.06it/s, loss=0.0571]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  55%|█████▍    | 256/469 [00:06<00:10, 20.82it/s, loss=0.0611]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  56%|█████▋    | 264/469 [00:06<00:07, 26.33it/s, loss=0.0430]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  58%|█████▊    | 272/469 [00:07<00:06, 31.36it/s, loss=0.0389]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  59%|█████▉    | 276/469 [00:07<00:05, 32.38it/s, loss=0.0621]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  61%|██████    | 284/469 [00:07<00:05, 34.31it/s, loss=0.0505]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  62%|██████▏   | 293/469 [00:07<00:04, 36.44it/s, loss=0.0328]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  65%|██████▍   | 303/469 [00:07<00:04, 39.91it/s, loss=0.0857]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  66%|██████▌   | 308/469 [00:08<00:03, 40.92it/s, loss=0.1122]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  68%|██████▊   | 318/469 [00:08<00:03, 41.40it/s, loss=0.0375]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  70%|██████▉   | 328/469 [00:08<00:03, 42.54it/s, loss=0.0396]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  71%|███████   | 333/469 [00:08<00:03, 41.89it/s, loss=0.0201]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  73%|███████▎  | 343/469 [00:08<00:03, 40.19it/s, loss=0.0287]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  75%|███████▌  | 353/469 [00:09<00:02, 40.19it/s, loss=0.0271]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  76%|███████▋  | 358/469 [00:09<00:02, 39.69it/s, loss=0.0623]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  78%|███████▊  | 366/469 [00:09<00:02, 35.59it/s, loss=0.0413]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  79%|███████▉  | 370/469 [00:09<00:02, 33.51it/s, loss=0.0993]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  81%|████████  | 378/469 [00:09<00:02, 31.70it/s, loss=0.0335]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  81%|████████▏ | 382/469 [00:10<00:02, 32.57it/s, loss=0.0422]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  82%|████████▏ | 386/469 [00:10<00:02, 29.12it/s, loss=0.0715]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  84%|████████▎ | 392/469 [00:10<00:03, 23.39it/s, loss=0.0216]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  85%|████████▌ | 400/469 [00:10<00:02, 28.82it/s, loss=0.0493]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  87%|████████▋ | 408/469 [00:11<00:01, 30.53it/s, loss=0.0143]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  89%|████████▊ | 416/469 [00:11<00:01, 33.41it/s, loss=0.0740]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  91%|█████████ | 425/469 [00:11<00:01, 36.05it/s, loss=0.0484]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  92%|█████████▏| 430/469 [00:11<00:01, 38.47it/s, loss=0.0828]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  94%|█████████▍| 440/469 [00:11<00:00, 41.35it/s, loss=0.0520]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  96%|█████████▌| 450/469 [00:12<00:00, 42.35it/s, loss=0.0439]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 4/5:  98%|█████████▊| 460/469 [00:12<00:00, 43.00it/s, loss=0.0982]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


                                                                         

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([96, 32, 14, 14])
torch.Size([96, 64, 8, 8])
torch.Size([96, 128, 4, 4])
torch.Size([96, 2048])
Epoch 4/5 - Tempo: 12.46s - Training Loss: 0.0488
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
Epoch 4/5 - Test Loss: 0.0003 - Test Accuracy: 98.44%


Epoch 5/5:   0%|          | 0/469 [00:00<?, ?it/s, loss=0.0288]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:   0%|          | 2/469 [00:00<00:43, 10.63it/s, loss=0.0288]

torch.Size([128, 32, 14, 14])


Epoch 5/5:   1%|          | 5/469 [00:00<00:25, 18.01it/s, loss=0.1787]

torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:   3%|▎         | 14/469 [00:00<00:14, 30.77it/s, loss=0.0446]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:   5%|▍         | 22/469 [00:00<00:14, 31.35it/s, loss=0.0051]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:   6%|▌         | 26/469 [00:00<00:14, 30.95it/s, loss=0.0135]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:   6%|▋         | 30/469 [00:01<00:14, 30.47it/s, loss=0.0421]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:   7%|▋         | 34/469 [00:01<00:17, 24.82it/s, loss=0.0137]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:   8%|▊         | 37/469 [00:01<00:23, 18.57it/s, loss=0.0063]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  10%|▉         | 45/469 [00:01<00:16, 25.10it/s, loss=0.0264]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  11%|█▏        | 53/469 [00:02<00:13, 30.43it/s, loss=0.0103]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  13%|█▎        | 61/469 [00:02<00:13, 31.35it/s, loss=0.0138]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  14%|█▍        | 65/469 [00:02<00:12, 32.59it/s, loss=0.0050]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  16%|█▌        | 74/469 [00:02<00:11, 35.77it/s, loss=0.0234]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  18%|█▊        | 84/469 [00:02<00:09, 40.14it/s, loss=0.0101]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  19%|█▉        | 89/469 [00:03<00:09, 41.08it/s, loss=0.0095]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  21%|██        | 99/469 [00:03<00:08, 43.25it/s, loss=0.0239]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  23%|██▎       | 109/469 [00:03<00:08, 43.61it/s, loss=0.0187]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  25%|██▌       | 119/469 [00:03<00:07, 44.68it/s, loss=0.0721]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  26%|██▋       | 124/469 [00:03<00:07, 44.01it/s, loss=0.0236]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  29%|██▊       | 134/469 [00:04<00:08, 40.14it/s, loss=0.0180]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  30%|██▉       | 139/469 [00:04<00:08, 40.23it/s, loss=0.0588]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  32%|███▏      | 149/469 [00:04<00:07, 40.01it/s, loss=0.1157]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  34%|███▎      | 158/469 [00:04<00:08, 37.47it/s, loss=0.0254]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  35%|███▍      | 162/469 [00:04<00:08, 35.87it/s, loss=0.0163]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  35%|███▌      | 166/469 [00:05<00:09, 33.05it/s, loss=0.0229]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  36%|███▌      | 170/469 [00:05<00:10, 27.31it/s, loss=0.0220]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  38%|███▊      | 177/469 [00:05<00:12, 22.52it/s, loss=0.0564]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  39%|███▉      | 185/469 [00:05<00:10, 28.01it/s, loss=0.0325]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  41%|████      | 193/469 [00:05<00:08, 30.95it/s, loss=0.0247]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  43%|████▎     | 201/469 [00:06<00:08, 32.92it/s, loss=0.0478]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  45%|████▍     | 209/469 [00:06<00:07, 35.63it/s, loss=0.0485]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  45%|████▌     | 213/469 [00:06<00:07, 36.09it/s, loss=0.0242]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  48%|████▊     | 223/469 [00:06<00:06, 39.41it/s, loss=0.0711]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  50%|████▉     | 233/469 [00:07<00:05, 41.56it/s, loss=0.0072]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  52%|█████▏    | 243/469 [00:07<00:05, 41.98it/s, loss=0.0296]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  53%|█████▎    | 248/469 [00:07<00:05, 41.61it/s, loss=0.0495]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  55%|█████▌    | 258/469 [00:07<00:05, 40.32it/s, loss=0.0710]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  56%|█████▌    | 263/469 [00:07<00:05, 40.07it/s, loss=0.0084]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  58%|█████▊    | 272/469 [00:08<00:05, 38.31it/s, loss=0.0051]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  60%|█████▉    | 280/469 [00:08<00:05, 35.94it/s, loss=0.0721]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  61%|██████    | 284/469 [00:08<00:05, 36.01it/s, loss=0.0317]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  62%|██████▏   | 292/469 [00:08<00:05, 34.26it/s, loss=0.0698]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  63%|██████▎   | 296/469 [00:08<00:05, 32.05it/s, loss=0.0056]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  65%|██████▍   | 303/469 [00:09<00:05, 28.81it/s, loss=0.0321]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  66%|██████▌   | 310/469 [00:09<00:05, 29.27it/s, loss=0.0103]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  68%|██████▊   | 317/469 [00:09<00:05, 29.63it/s, loss=0.0285]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  69%|██████▉   | 323/469 [00:09<00:04, 29.61it/s, loss=0.0342]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  70%|███████   | 330/469 [00:09<00:04, 29.99it/s, loss=0.0482]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])


Epoch 5/5:  71%|███████   | 333/469 [00:10<00:04, 29.97it/s, loss=0.0097]

torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  73%|███████▎  | 341/469 [00:10<00:04, 31.92it/s, loss=0.0231]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  74%|███████▍  | 349/469 [00:10<00:03, 33.54it/s, loss=0.0122]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  76%|███████▋  | 358/469 [00:10<00:03, 36.30it/s, loss=0.0117]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  77%|███████▋  | 363/469 [00:10<00:02, 38.13it/s, loss=0.1137]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  80%|███████▉  | 373/469 [00:11<00:02, 42.00it/s, loss=0.0171]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
t

Epoch 5/5:  82%|████████▏ | 383/469 [00:11<00:01, 43.66it/s, loss=0.0119]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  84%|████████▍ | 393/469 [00:11<00:01, 43.99it/s, loss=0.0182]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  85%|████████▍ | 398/469 [00:11<00:01, 43.13it/s, loss=0.0211]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  87%|████████▋ | 408/469 [00:11<00:01, 42.39it/s, loss=0.0118]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  89%|████████▉ | 418/469 [00:12<00:01, 43.20it/s, loss=0.0432]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  91%|█████████▏| 428/469 [00:12<00:00, 43.27it/s, loss=0.0091]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  92%|█████████▏| 433/469 [00:12<00:00, 40.69it/s, loss=0.0517]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  94%|█████████▍| 442/469 [00:12<00:00, 38.68it/s, loss=0.0093]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  96%|█████████▌| 450/469 [00:12<00:00, 36.41it/s, loss=0.0687]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  97%|█████████▋| 454/469 [00:13<00:00, 36.24it/s, loss=0.0777]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


Epoch 5/5:  99%|█████████▊| 462/469 [00:13<00:00, 31.77it/s, loss=0.0527]

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])


                                                                         

torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
torch.Size([96, 32, 14, 14])
torch.Size([96, 64, 8, 8])
torch.Size([96, 128, 4, 4])
torch.Size([96, 2048])
Epoch 5/5 - Tempo: 13.51s - Training Loss: 0.0353
torch.Size([128, 32, 14, 14])
torch.Size([128, 64, 8, 8])
torch.Size([128, 128, 4, 4])
torch.Size([128, 2048])
Epoch 5/5 - Test Loss: 0.0003 - Test Accuracy: 98.44%

Training Complete.


### Weights extraction

In [190]:
model = SimpleCNN(num_classes=10)
model.load_state_dict(torch.load('simple_cnn_mnist.pth', map_location=torch.device('cpu'),weights_only=True)) # Carica su CPU

model.eval() # good practice is to set model in evaluation when you want to extract weights

# --- Parameters Extraction ⛏️ and Numpy Conversion ---

# Weights container
numpy_weights = {}

# Move model on cpu
model.to('cpu')

print("⛏️ Weights and Bias Extraction ⛏️\n")

# Layer Conv1
# PyTorch weight shape: (out_channels, in_channels, kernel_height, kernel_width)
# NumPy expected: (in_channels, out_channels, kernel_width, kernel_height) -> (1, 32, 3, 3)
pyt_k1_w = model.conv1.weight.data.detach().numpy()
# Transpose: (out, in, kH, kW) -> (in, out, kW, kH)
numpy_weights['k1'] = pyt_k1_w
print("----------------------------------------")
print(pyt_k1_w[0][:1])
print("----------------------------------------")
print(numpy_weights['k1'][0][:1])
# PyTorch bias shape: (out_channels,)
numpy_weights['b_conv1'] = model.conv1.bias.data.detach().numpy() # Shape (32,)
print(f"k1: PyTorch Shape={pyt_k1_w.shape}, NumPy Shape={numpy_weights['k1'].shape}")
print(f"b_conv1: NumPy Shape={numpy_weights['b_conv1'].shape}")

# Layer Conv2
# PyTorch weight shape: (64, 32, 3, 3)
# NumPy expected: (32, 64, 3, 3)
pyt_k2_w = model.conv2.weight.data.detach().numpy()
numpy_weights['k2'] = pyt_k2_w
numpy_weights['b_conv2'] = model.conv2.bias.data.detach().numpy() # Shape (64,)
print(f"k2: PyTorch Shape={pyt_k2_w.shape}, NumPy Shape={numpy_weights['k2'].shape}")
print(f"b_conv2: NumPy Shape={numpy_weights['b_conv2'].shape}")

# Layer Conv3
# PyTorch weight shape: (128, 64, 3, 3)
# NumPy expected: (64, 128, 3, 3)
pyt_k3_w = model.conv3.weight.data.detach().numpy()
numpy_weights['k3'] = pyt_k3_w
numpy_weights['b_conv3'] = model.conv3.bias.data.detach().numpy() # Shape (128,)
print(f"k3: PyTorch Shape={pyt_k3_w.shape}, NumPy Shape={numpy_weights['k3'].shape}")
print(f"b_conv3: NumPy Shape={numpy_weights['b_conv3'].shape}")

# Layer FC1
# PyTorch weight shape: (out_features, in_features) -> (250, 2048)
# NumPy expected (per input @ W): (in_features, out_features) -> (2048, 250)
pyt_w1 = model.fc1.weight.data.detach().numpy()
numpy_weights['w1'] = pyt_w1.T # Trasponi
# PyTorch bias shape: (out_features,) -> (250,)
# NumPy expected (per aggiunta diretta): (1, out_features) -> (1, 250)
pyt_b1 = model.fc1.bias.data.detach().numpy()
numpy_weights['b1'] = pyt_b1.reshape(1, -1) # Rendi (1, 250)
print(f"w1: PyTorch Shape={pyt_w1.shape}, NumPy Shape={numpy_weights['w1'].shape}")
print(f"b1: PyTorch Shape={pyt_b1.shape}, NumPy Shape={numpy_weights['b1'].shape}")

# Layer FC2
# PyTorch weight shape: (num_classes, 250) -> (10, 250)
# NumPy expected: (250, num_classes) -> (250, 10)
pyt_w2 = model.fc2.weight.data.detach().numpy()
numpy_weights['w2'] = pyt_w2.T # Trasponi
# PyTorch bias shape: (num_classes,) -> (10,)
# NumPy expected: (1, num_classes) -> (1, 10)
pyt_b2 = model.fc2.bias.data.detach().numpy()
numpy_weights['b2'] = pyt_b2.reshape(1, -1) # Rendi (1, 10)
print(f"w2: PyTorch Shape={pyt_w2.shape}, NumPy Shape={numpy_weights['w2'].shape}")
print(f"b2: PyTorch Shape={pyt_b2.shape}, NumPy Shape={numpy_weights['b2'].shape}")

print("\nExtraction complete. Numpy weights are in the dictionary 'numpy_weights'.")

# Access Example:
np_k1 = numpy_weights['k1']
np_b_conv1 = numpy_weights['b_conv1']
np_k2 = numpy_weights['k2']
np_b_conv2 = numpy_weights['b_conv2']
np_k3 = numpy_weights['k3']
np_b_conv3 = numpy_weights['b_conv3']
np_w1 = numpy_weights['w1']
np_b1 = numpy_weights['b1']
np_w2 = numpy_weights['w2']
np_b2 = numpy_weights['b2']



# [[[[-0.06239345  0.16331542  0.28573602]
#    [ 0.299534    0.48019555  0.25194943]
#    [-0.24432278  0.3191273  -0.06802213]]]


#  [[[ 0.10294101 -0.14240074  0.01178457]
#    [ 0.3072691  -0.06823204  0.30347323]
#    [-0.06327374  0.3396498   0.07433306]]]



#    [[[[-0.06239345  0.16331542  0.28573602]
#    [ 0.299534    0.48019555  0.25194943]
#    [-0.24432278  0.3191273  -0.06802213]]

#   [[ 0.10294101 -0.14240074  0.01178457]
#    [ 0.3072691  -0.06823204  0.30347323]
#    [-0.06327374  0.3396498   0.07433306]]

Flatten(start_dim=1, end_dim=-1)
⛏️ Weights and Bias Extraction ⛏️

----------------------------------------
[[[-0.50176585 -0.3435954 ]
  [ 0.07998651 -0.02189105]]]
----------------------------------------
[[[-0.50176585 -0.3435954 ]
  [ 0.07998651 -0.02189105]]]
k1: PyTorch Shape=(32, 1, 2, 2), NumPy Shape=(32, 1, 2, 2)
b_conv1: NumPy Shape=(32,)
k2: PyTorch Shape=(64, 32, 3, 3), NumPy Shape=(64, 32, 3, 3)
b_conv2: NumPy Shape=(64,)
k3: PyTorch Shape=(128, 64, 3, 3), NumPy Shape=(128, 64, 3, 3)
b_conv3: NumPy Shape=(128,)
w1: PyTorch Shape=(250, 1152), NumPy Shape=(1152, 250)
b1: PyTorch Shape=(250,), NumPy Shape=(1, 250)
w2: PyTorch Shape=(10, 250), NumPy Shape=(250, 10)
b2: PyTorch Shape=(10,), NumPy Shape=(1, 10)

Extraction complete. Numpy weights are in the dictionary 'numpy_weights'.


## CNN - NumPy

### Padding

`np.pad()` takes as first argument the matrix to pad and as second argument a set of specification: for every dimension (in our case 4) it takes the number of paddings to add before and after the end of the dimension. If the objective is to pad only the image itself, which is found in the last two dimension, we should write:

`np.pad(img9,((0,0),(0,0),(pad,pad),(pad,pad)))` 

since dimensions are: BATCH, CHANNELS, HEIGHT, WIDTH

In [4]:
img9 = np.arange(1,37).reshape(2,2,3,3)
pad_img9 = np.pad(img9,((0,0),(0,0),(1,1),(1,1)))
print(img9)
print(pad_img9)

[[[[ 1  2  3]
   [ 4  5  6]
   [ 7  8  9]]

  [[10 11 12]
   [13 14 15]
   [16 17 18]]]


 [[[19 20 21]
   [22 23 24]
   [25 26 27]]

  [[28 29 30]
   [31 32 33]
   [34 35 36]]]]
[[[[ 0  0  0  0  0]
   [ 0  1  2  3  0]
   [ 0  4  5  6  0]
   [ 0  7  8  9  0]
   [ 0  0  0  0  0]]

  [[ 0  0  0  0  0]
   [ 0 10 11 12  0]
   [ 0 13 14 15  0]
   [ 0 16 17 18  0]
   [ 0  0  0  0  0]]]


 [[[ 0  0  0  0  0]
   [ 0 19 20 21  0]
   [ 0 22 23 24  0]
   [ 0 25 26 27  0]
   [ 0  0  0  0  0]]

  [[ 0  0  0  0  0]
   [ 0 28 29 30  0]
   [ 0 31 32 33  0]
   [ 0 34 35 36  0]
   [ 0  0  0  0  0]]]]


### Delating

`delateOne` adds one zero between each element in the matrix given in input. this is done to be able to do the backward phase with stride 1 even in the forward it was 2, by modifying the gradient of the output. Motivations will be better analyzed in the next sections

In [248]:
def delateOne(matrix):
    indix = np.arange(1,matrix.shape[3])
    matrix = np.insert(matrix,indix,0,3)
    indix = np.arange(-(matrix.shape[-2]-1),0)
    matrix = np.insert(matrix,indix,0,-2)
    return matrix

### Slow Convolution Layer: Forward

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# This is a PyTorch Convolution example to be used to check if the convolution implemented in both slow and fast approaches are correct

class CustomConv(nn.Module):
    def __init__(self, kernel: torch.Tensor, bias: torch.Tensor = None, 
                 stride=1, padding=0):
        super().__init__()
        out_ch, in_ch, k_h, k_w = kernel.shape
        self.stride = stride
        self.padding = padding
        
        self.conv = nn.Conv2d(in_channels=in_ch,
                              out_channels=out_ch,
                              kernel_size=(k_h, k_w),
                              stride=stride,
                              padding=padding,
                              bias=(bias is not None))
        with torch.no_grad():
            self.conv.weight.copy_(kernel)
            if bias is not None:
                self.conv.bias.copy_(bias)

        self.conv.weight.requires_grad_(False)
        if bias is not None:
            self.conv.bias.requires_grad_(False)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return F.relu(self.conv(x))

def Slow_ReLU_Conv(img,ker,bias=np.array(0),pad=0,stride=1,applyReLU=True):
    if applyReLU: # Forward case
        out_ch, in_ch, k_width, k_height = ker.shape
        nk_channel = out_ch
    else: # Backward case
        in_ch, out_ch, k_width, k_height = ker.shape
        nk_channel = in_ch

    # bias has shape out_ch, 1, 1. It's a scalar value for each channel broadcasted to the kernel's width and height
    # number of channels taken in input by the kernel 'in_ch' 
    # must be the same as the number of channels of the image 'channels'

    img = np.pad(img,((0,0),(0,0),(pad,pad),(pad,pad)))
    n_images, channels, i_height, i_width  = img.shape
    ni_height = int(((i_height - k_height) / stride) + 1) # new image height # Padding is already added
    ni_width = int(((i_width - k_width) / stride) + 1) # new image width
    ni = np.zeros((n_images, out_ch, ni_height, ni_width)).astype(np.float32) # new image

    if in_ch != channels:
        raise ValueError(f"number of channels taken in input by the kernel ({in_ch}) must be the same as the number of channels of the image ({channels})")

    for one_img in range(n_images):
        for one_k_channel in range(nk_channel):
            for i_nih in range(ni_height): # which cycles row by row of the new image
                for i_niw in range(ni_width): # which cycles column by column of the new image
                    current_sum = 0.0 # convolution sum for the specific output cell
                    # Convolution cycles
                    for channel in range(channels): # channels == in_ch
                        for i_kh in range(k_height):
                            input_y = (i_nih * stride) + i_kh # get the y location, the height
                            for i_kw in range(k_width):
                                input_x = (i_niw * stride) + i_kw # get the x location, the width
                                # check that everything stays in the measures
                                if 0 <= input_y < i_height and 0 <= input_x < i_width:
                                    input_val = img[one_img, channel, input_y, input_x]
                                    kernel_val = ker[one_k_channel, channel, i_kh, i_kw]
                                    current_sum += (input_val * kernel_val).astype(np.float32)
                    ni[one_img, one_k_channel, i_nih, i_niw] = current_sum
    if bias.all() != 0:
        bias = bias.reshape(bias.shape[0],1,1)
        if bias.shape[0] != out_ch:
            raise ValueError(f"bias dimension ({bias.shape[0]}) doesn't match kernel's number of channels ({out_ch})")
        ni = ni + bias
    ni = ni.astype(np.float32)
    if applyReLU:
        ni = np.maximum(0, ni)
        mask = ni.copy()
        mask[mask > 0] = 1
        return ni,mask
    else:
        return ni
#-------------------------------------------- Examples --------------------------------------------------------
img = np.arange(1,3*4+1).reshape(1,1,3,4).astype(np.float32)
print("-------img-------")
print(img)
ker = np.arange(1,8+1).reshape(2,1,2,2)
print("-------ker-------")
print(ker)
bias = np.array([1,2]).reshape(2,1,1)
res,mask = Slow_ReLU_Conv(img,ker,bias,pad=1,stride=2)
print("-------Conv Slow-------")
print(res)
# print("------mask-------")
# print(mask)


my_kernel = torch.from_numpy(ker).float()

my_bias = torch.from_numpy(np.array([1,2])).float()

modelC = CustomConv(kernel=my_kernel,bias=my_bias, stride=2, padding=1)

# input di prova (batch=1, canali=1, H=5, W=5)
x = torch.from_numpy(img)
y = modelC(x)
print("-------Conv PyTorch-------")
print(y)

-------img-------
[[[[ 1.  2.  3.  4.]
   [ 5.  6.  7.  8.]
   [ 9. 10. 11. 12.]]]]
-------ker-------
[[[[1 2]
   [3 4]]]


 [[[5 6]
   [7 8]]]]
-------Conv Slow-------
[[[[  5.  19.  13.]
   [ 47.  95.  45.]]

  [[ 10.  40.  30.]
   [104. 232. 126.]]]]
-------Conv PyTorch-------
tensor([[[[  5.,  19.,  13.],
          [ 47.,  95.,  45.]],

         [[ 10.,  40.,  30.],
          [104., 232., 126.]]]])


### Slow Convolution Layer: Backward

**Actors:**
1. W is the kernel
2. $\delta$ is the gradient
3. x is the input to the convolution layer during forward
4. b is the bias

**Steps:**

- **Derive delta**

Deriving delta with respect to ReLU activation consists in the hadamard product (element-wise product) of the gradient ($\delta$) and the mask obtained at the forward step, that is, all the elements in the convolved image greater than zero are put to one, the rest is zero.
$$
\delta^{(i)} = \delta_{\text{flat reshaped}} \cdot \text{mask}
$$

- **Gradient with respect to W**:

$$
\frac{\partial L}{\partial W^{(i)}} = \text{Convolution}(x^{(i)}, \delta)
$$
This convolution creates a matrix for every channel of input image $x^{i}$ and for every channel of output image $\delta$, thus resulting in the correct number of channels

- **Gradient w.r.t. the input \( x \)** (To go to the preceding layer):

$$
\delta^{(i-1)} = \text{Full\_Convolution}(\delta^{(i)}, W^{(i)})
$$

- **Gradient w.r.t the bias**

Since the bias is added equally across the spatial dimensions of each output channel, the gradient is the sum of all elements in each output channel:

$$
\frac{\partial L}{\partial b^{(i)}_c} = \sum_{h,w} \delta^{(i)}_{c,h,w}
$$

For batched inputs, sum also across the batch dimension:

$$
\frac{\partial L}{\partial b^{(i)}_c} = \sum_{n,h,w} \delta^{(i)}_{n,c,h,w}
$$

In [282]:
def Slow_ReLU_Gradient(img,d_img,ker,mask,pad=0,stride=1):
    """
    NEW APPROACH !
    Performs the backward pass of the convolution layer. It takes the original image, 
    the gradient image, and then the kernel, padding and stride used in the convolution. Also the mask is needed to perform the ReLU operation.
    It returns the gradient w.r.t. the Original Image to back propagate and the gradient of the kernel
    """ 
    ############################################# Gradient of Input Image ####################################
    # The computation consists in a convolution where the image is the gradient of the output image delated (zeros between matrix elements) of stride-1
    # and padded of kernel-1 dimensions 
    # and the kernel 180 degrees rotation (flipped vertically and then horizontally)
    # FullConvolution(d_imgDelated, Rotated180Deg(kernel)) with stride 1
    out_ch, in_ch, k_height, k_width = ker.shape
    batch_s, in_ch, img_height, img_width = img.shape

    # backward ReLU
    d_img = np.multiply(d_img,mask)

    # Delating the gradient of output
    if stride == 2:
        d_img = delateOne(d_img)
    elif stride > 2:
        raise ValueError(f"Stride greater than 2 is not acceptable")
    d_imgPadded = np.pad(d_img,((0,0),(0,0),(k_height-1-pad,k_height-1-pad),(k_width-1-pad,k_width-1-pad)))
    batch_s, out_ch, dimg_height, dimg_width = d_img.shape
    
    # flipping the kernel
    ker180 = np.rot90(ker,2,(-2,-1))

    # Computation
    gi = np.zeros_like(img)
    current_sum = 0.0
    for bs in range(batch_s):
        for i_gih in range(img_height):
            for i_giw in range(img_width):
                for i_outch in range(out_ch):
                    for i_inch in range(in_ch):
                        for i_kh in range(k_height):
                            y = i_gih + i_kh
                            for i_kw in range(k_width):
                                x = i_gih + i_kw

                                if 0 <= y < d_imgPadded.shape[-2] and 0 <= x < d_imgPadded.shape[-1]:
                                    input_val = d_imgPadded[bs,i_outch,y,x]
                                    ker_val = ker180[i_outch,i_inch,i_kh,i_kw] 
                                else:
                                    break
                                current_sum += input_val*ker_val
                    gi[bs,i_inch,i_gih,i_giw] = current_sum
                    current_sum = 0.0

    ############################################# Gradient of Kernel ####################################
    # The computation consists in a convolution between the original image and the delated gradient of the output image in order to
    # find the kernel
    gk = np.zeros_like(ker)
    img = np.pad(img,((0,0),(0,0),(pad,pad),(pad,pad)))
    current_sum = 0.0
    for bs in range(batch_s):
        for i_gih in range(k_height):
            for i_giw in range(k_width):
                for i_inch in range(in_ch):
                    for i_outch in range(out_ch):
                        for i_kh in range(dimg_height):
                            y = i_gih + i_kh
                            for i_kw in range(dimg_width):
                                x = i_gih + i_kw
                                if 0 <= y < img_height and 0 <= x < img_width:
                                    input_val = img[bs,i_inch,y,x]
                                    ker_val = d_img[bs,i_outch,i_kh,i_kw] 
                                    current_sum += input_val*ker_val
                                else:
                                    break
                        gk[i_outch,i_inch,i_gih,i_giw] = current_sum
                        current_sum = 0.0

    ############################################# Gradient of Bias ####################################
    # The computation consists in summing the gradient of the output image together to find the bias for every channel
    gb = d_img.sum((-1,-2)) # sum over height and width
    
    ################################################### Return Results ###############################################
    return gi,gk,gb

in_ch = 1
out_ch = 2
idim = 7
kdim = 2
imAge = np.arange(1,1*in_ch*idim*idim+1).reshape(1,in_ch,idim,idim)
kerNel = np.arange(1,out_ch*in_ch*(kdim**2)+1).reshape(out_ch,in_ch,kdim,kdim)
dimAge,mask = Slow_ReLU_Conv(imAge,kerNel,stride=2,pad=1) 
dimAge = dimAge/np.mean(dimAge)
ggi,ggk,ggb = Slow_ReLU_Gradient(imAge,dimAge,kerNel,mask,stride=2,pad=1)
print(f"imAge: {imAge.shape}")
print(f"kerNel: {kerNel.shape}")
print(f"dimAge: {dimAge.shape}")
print(f"ggi: {ggi.shape}")
print(f"ggk: {ggk.shape}")

imAge: (1, 1, 7, 7)
kerNel: (2, 1, 2, 2)
dimAge: (1, 2, 4, 4)
ggi: (1, 1, 7, 7)
ggk: (2, 1, 2, 2)


In [250]:
# ################# OLD APPROACH ######################
# def Slow_ReLU_Gradient(img,d_img,ker,mask,pad=0,stride=1):
#     """
#     Performs the backward pass of the convolution layer. It takes the original image, 
#     the gradient image, and then the kernel, padding and stride used in the convolution. Also the mask is needed to perform the ReLU operation.
#     It returns the gradient w.r.t. the Original Image to back propagate and the gradient of the kernel
#     """ 

#     out_ch, in_ch, k_height, k_width  = ker.shape 
#                                             # Example #
#     # Convolving an RGB image with 32 2x2 kernels will give a shape of (32, 3, 2, 2) to the kernel. #
    
#     n_images, channels, i_height, i_width  = img.shape
#     n_images, dch, di_height, di_width  = d_img.shape
    
#     ni_height = (i_height-1)*stride-(2*pad)+k_height # new image height
#     ni_width =  (i_width-1)*stride-(2*pad)+k_width # new image width
#     height_to_pad = ni_height-i_height
#     width_to_pad = ni_width-i_width
#     d_img = np.multiply(d_img,mask)
#     d_imgP = np.pad(d_img,((0,0),(0,0),(height_to_pad,height_to_pad),(width_to_pad,width_to_pad)))
#     gi = np.zeros_like(img).astype(np.float32) # gradient of original image
#     gk = np.zeros_like(ker).astype(np.float32) # gradient of kernel

# ############################## Computing the gradient of the original image ######################################
#     current_sum = 0.0 # convolution sum for the specific output cell
#     for one_img in range(n_images):
#         for channel in range(channels):
#             for i_nih in range(i_height): # which cycles row by row of the new image
#                 for i_niw in range(i_width): # which cycles column by column of the new image
#                     # Convolution cycles
#                     for one_k_channel in range(out_ch): # channels == out_ch
#                         for i_kh in range(k_height):
#                             input_y = (i_nih * stride) + i_kh # get the y location, the height
#                             for i_kw in range(k_width):
#                                 input_x = (i_niw * stride) + i_kw # get the x location, the width
#                                 # check that everything stays in the measures
#                                 if 0 <= input_y < d_imgP.shape[2] and 0 <= input_x < d_imgP.shape[3]:
#                                     input_val = d_imgP[one_img, one_k_channel, input_y, input_x]
#                                     kernel_val = ker[one_k_channel,channel, i_kh, i_kw]
#                                     current_sum += (input_val * kernel_val).astype(np.float32)
#                     gi[one_img, channel, i_nih, i_niw] = current_sum
#                     current_sum = 0.0
    
# ############################## Computing the gradient of the kernel ##############################################
# # Need to convolve the gradient of the image with the original image, using the gradient of the image as the kernel and 
# # keeping the same stride and padding (Otherwise the kernel won't work)
#     current_sum = 0.0
#     for one_img in range(n_images):
#         for in_k_ch in range(in_ch): # which in the example is 3
#             for out_k_ch in range(out_ch): # which in the example is 32
#                 for k_gh in range(k_height):
#                     for k_gw in range(k_width):
#                     # gk[out_k_ch,in_k_ch,k_gh,k_gw] = something
#                         for i_dh in range(di_height):
#                             input_y = (k_gh * stride) + i_dh # get the y location, the height
#                             for i_dw in range(di_width):
#                                 input_x = (k_gw * stride) + i_dw
#                                 # check that everything stays in the measures
#                                 if 0 <= input_y < i_height and 0 <= input_x < i_width:
#                                     input_val = img[one_img, in_k_ch, input_y, input_x]
#                                     kernel_val = d_img[one_img, out_k_ch, i_dh, i_dw]
#                                     current_sum += (input_val * kernel_val).astype(np.float32)
#                         gk[out_k_ch, in_k_ch, k_gh, k_gw] += current_sum

# ############################## Computing the gradient of the bias ##############################################
#     gb = d_img.sum((0,-1,-2)) # sum over batch, height and width
# ################################################### Return Results ###############################################
#     return gi,gk,gb

# img = np.arange(1,17).reshape(1,1,4,4)
# ker = np.arange(1,9).reshape(2,1,2,2)
# bias = np.array([1,1])
# d_img,mask=Slow_ReLU_Conv(img,ker,bias)
# print("-------------d_img--------------")
# d_img = d_img - 2
# print(d_img)
# print(d_img.shape)
# print("--------------------------------")
# a,b,c = Slow_ReLU_Gradient(img,d_img,ker,mask)
# print(a)
# print(b)
# print(c)

### Fast Convolution Layer: Forward

In [69]:
def Fast_ReLU_Conv(batch_of_images,kernel,bias=np.array(0),pad=0,stride=1,applyReLU=True):
    kc, ac, kw, kh = kernel.shape # number of kernels, number of input channels, kernel width and kernel height
    # im2col: Window creation
    batch_of_images = np.pad(batch_of_images,((0,0),(0,0),(pad,pad),(pad,pad)))
    bs, nc, iw, ih = batch_of_images.shape # batch of images' number of images, number of channels, single image's width, single images's height
    window_m = np.lib.stride_tricks.sliding_window_view(batch_of_images,(1,nc,kw,kh))[:,:,::stride,::stride].reshape((-1,(kw*kh*nc))) # window matrix
    # Convolution
    kernel = kernel.reshape((-1,(kw*kh*nc))).transpose(1,0)
    c_m = (window_m @ kernel).astype(np.float32) # convolved image matrix
    # ReLU activation
    nih = int(((ih-kh) / stride) + 1) # new image height # Padding is already added
    niw = int(((iw-kw) / stride) + 1) # new image width
    # First operate a reshape keeping spatial ordering, which has channels at the end
    output_temp = c_m.reshape(bs, nih, niw, kc)
    # Transpose to have input in shapes (batch, output_channel, height, width)
    reshaped_correct_order = output_temp.transpose(0,3,1,2).astype(np.float32)
    if bias.any() != 0:
        reshaped_correct_order = (reshaped_correct_order + bias.reshape(1,-1,1,1))
    if applyReLU:
        reshaped_correct_order = np.maximum(0,reshaped_correct_order)
    mask = np.copy(reshaped_correct_order)
    mask[mask>0]=1
    return reshaped_correct_order,mask



img = np.arange(1,2*3*3+1).reshape(1,2,3,3).astype(np.float32)
# print("-------img-------")
# print(img)
ker = np.arange(1,16+1).reshape(2,2,2,2)
# print("-------ker-------")
# print(ker)
bias = np.array([1,2]).reshape(2,1,1)
res,mask = Slow_ReLU_Conv(img,ker,bias,pad=0,stride=1)
print("-------Conv Slow-------")
print(res)
X_c,mask = Fast_ReLU_Conv(img,ker,bias,pad = 1,stride=1)
print("-------Conv Fast-------")
print(X_c)
res,mask = Slow_ReLU_Conv(res,ker,bias,pad=0,stride=1)
print("-------Conv Slow-------")
print(res)
X_c,mask = Fast_ReLU_Conv(X_c,ker,bias,pad = 0,stride=1)
print("-------Conv Fast-------")
print(X_c)

-------Conv Slow-------
[[[[ 357.  393.]
   [ 465.  501.]]

  [[ 838.  938.]
   [1138. 1238.]]]]
-------Conv Fast-------
[[[[  85.  170.  192.   94.]
   [ 183.  357.  393.  187.]
   [ 243.  465.  501.  235.]
   [ 111.  206.  220.  100.]]

  [[ 174.  363.  417.  215.]
   [ 408.  838.  938.  476.]
   [ 564. 1138. 1238.  620.]
   [ 296.  591.  637.  317.]]]]
-------Conv Slow-------
[[[[32231.]]

  [[79176.]]]]
-------Conv Fast-------
[[[[15011. 20885. 16057.]
   [23607. 32231. 24383.]
   [18779. 25317. 18937.]]

  [[35636. 50230. 39354.]
   [57176. 79176. 61088.]
   [47692. 65286. 49882.]]]]


### Fast Convolution Layer: Backward

In [70]:
def Fast_ReLU_Gradient(batch_of_images,d_image,kernel,mask,pad=0,stride=1):
    out_ch, in_ch, kh, kw = kernel.shape # number of kernels, number of input channels, kernel width and kernel height
    bs, nc, i_height,i_width = batch_of_images.shape # batch of images' number of images, number of channels, single image's width, single images's height

    batchSize, out_ch, dh, dw = d_image.shape # number of kernels, number of input channels, kernel width and kernel height
    ni_height = int(((i_height-1)*stride)+kh) # new image height
    ni_width =  int(((i_width-1)*stride)+kw) # new image width
    height_to_pad = (ni_height-dh)
    width_to_pad = (ni_width-dw)

    half_htp = height_to_pad//2
    half_wtp = width_to_pad//2

    d_image = np.multiply(d_image,mask)
    d_imgP = np.pad(d_image,((0,0),(0,0),(half_htp,half_htp),(half_wtp,half_wtp)))

    batch_of_images = np.pad(batch_of_images,((0,0),(0,0),(pad,pad),(pad,pad)))
    bs, nc, iw, ih = batch_of_images.shape # batch of images' number of images, number of channels, single image's width, single images's height
    
    ############################## Computing the gradient of the bias ##############################################
    gb = d_image.sum((0,-1,-2)) # sum over batch, height and width

    ########################################## Gradient of Kernel ###################################################
    window_boi = np.lib.stride_tricks.sliding_window_view(batch_of_images,(1,1,dh,dw))[:,:,::stride,::stride].reshape((-1,(dw*dh*1))) # window matrix
    d_image = d_image.reshape((-1,(dw*dh*1))).transpose(1,0)
    gk = (window_boi @ d_image).transpose(1,0).reshape(out_ch, in_ch, kh, kw,).astype(np.float32) # convolved image matrix

    ########################################## Gradient of Image ###################################################
    gi,_ = Fast_ReLU_Conv(d_imgP,kernel.transpose(1,0,2,3),stride = stride,pad=pad,applyReLU=False)
    # window_dboi = np.lib.stride_tricks.sliding_window_view(d_imgP,(1,out_ch,kh,kw))[:,:,::stride,::stride].reshape((-1,(kw*kh*out_ch))) # window matrix
    # kernel = kernel.reshape((-1,(kw*kh*out_ch))).transpose(1,0)
    # gi = (window_dboi @ kernel).reshape(bs, i_height, i_width, nc).transpose(0,3,1,2).astype(np.float32)

    ################################################### Return Results ###############################################
    return gi,gk,gb

s = 2
p = 1
in_ch = 3
out_ch = 32
i_dim = 28
k_dim = 3
img = np.arange(1,i_dim*in_ch*i_dim+1).reshape(1,in_ch,i_dim,i_dim)
ker = np.arange(1,out_ch*k_dim*in_ch*k_dim+1).reshape(out_ch,in_ch,k_dim,k_dim)
bias = np.ones(out_ch)
d_img,mask = Fast_ReLU_Conv(img,ker,bias,stride=s,pad=p)

print("-------------img--------------")
#print(img)
print(img.shape)
print("-------------ker--------------")
#print(ker)
print(ker.shape)
print("################################")
print("-------------d_img--------------")
#print(d_img-2)
print(d_img.shape)

print("************************************")
a,b,c = Fast_ReLU_Gradient(img,d_img-2,ker,mask,stride=s,pad=p)
print("-------------gi-----------------")
print(a.shape)
print("--------------gk-----------------")
print(b.shape)
print("--------------gb-----------------")
print(c.shape)

####################### Expected result ##########################
# [[[[ 1  2  3  4]
#    [ 5  6  7  8]
#    [ 9 10 11 12]
#    [13 14 15 16]]]]
# -------------d_img--------------
# [[[[ 43.  53.  63.]
#    [ 83.  93. 103.]
#    [123. 133. 143.]]

#   [[ 99. 125. 151.]
#    [203. 229. 255.]
#    [307. 333. 359.]]]]
# (1, 2, 3, 3)
# --------------------------------
# [[[[ 964. 2034. 2494. 1246.]
#    [2636. 5268. 6044. 2912.]
#    [4332. 8372. 9148. 4320.]
#    [2088. 3922. 4238. 1938.]]]]
# [[[[ 6042.  6879.]
#    [ 9390. 10227.]]]


#  [[[15018. 17079.]
#    [23262. 25323.]]]]
# [ 837. 2061.]
##################################################

-------------img--------------
(1, 3, 28, 28)
-------------ker--------------
(32, 3, 3, 3)
################################
-------------d_img--------------
(1, 32, 14, 14)
************************************


ValueError: cannot reshape array of size 7776 into shape (32,3,3,3)

In [77]:
7776/32/3/9/9

1.0

### MLP Layer: Forward

In [10]:
def softmax(x):
    e_x = np.exp(x - np.max(x,axis=-1,keepdims=True))  # for numerical stability
    return e_x / np.sum(e_x,axis=-1,keepdims=True)

def ReLU_SoftMax_FullyConnected(input_array,w1,b1,w2,b2):
    fl = (input_array @ w1)+b1 # first layer
    fa = np.maximum(0,fl) # first activation: ReLU
    sl = (fa @ w2)+b2 # second layer
    sa = softmax(sl) # second activation: SoftMax
    return fl,fa,sl,sa

#print(softmax([1,2,3,100000]))
#print(softmax_no_NS([1,2,3,1000]))
#r = np.array(np.array([1,2,777,2]))
#print(softmax(r))
#r = np.array((np.array([1,2,777,2]),np.array([1,2,777,2]),np.array([1,2,777,2])))
#print(softmax(r))

### MLP Layer: Backward

In [11]:
def ReLU_SoftMax_FC_Backward(bs,pred,labels,w1,w2,fa,fl,i_mlp):
    dL_dz2 = pred-labels[0:bs]
    dL_dw2 = fa.T @ dL_dz2
    dL_db2 = np.sum(dL_dz2, axis=0)
    dL_dfa = dL_dz2 @ w2.T
    dReLU = (fl > 0).astype(float)
    dL_dfl = dL_dfa * dReLU
    dL_dw1 = i_mlp.reshape(bs, -1).T @ dL_dfl
    dL_db1 = np.sum(dL_dfl, axis=0)
    dL_i_mlp = dL_dfl @ w1.T
    return dL_i_mlp,dL_dw1,dL_db1,dL_dw2,dL_db2

### Loss Function: Categorical Cross-Entropy

In [12]:
def crossEntropy(p,t):
    # p stands for prediction and t stands for true label
    # p = [0,0,1] and t = [1,0,0]
    p = p+(1/100000) # for numerical stability
    return -np.dot(t,np.log(p).T)

#c = [1,1000000000000000,1,1]
#c = softmax(c)
#print(c)
#c = crossEntropy(c,[0,1,0,0])
#print(c)

## Inference

In this section the three implementations will be compared in terms of time. Recall that all the predictions should be the same since the weights are the same.

In [191]:
import time
from tqdm import tqdm

np_k1 = numpy_weights['k1'].astype(np.float32)
np_b_conv1 = numpy_weights['b_conv1'].astype(np.float32)
np_k2 = numpy_weights['k2'].astype(np.float32)
np_b_conv2 = numpy_weights['b_conv2'].astype(np.float32)
np_k3 = numpy_weights['k3'].astype(np.float32)
np_b_conv3 = numpy_weights['b_conv3'].astype(np.float32)
np_w1 = numpy_weights['w1'].astype(np.float32)
np_b1 = numpy_weights['b1'].astype(np.float32)
np_w2 = numpy_weights['w2'].astype(np.float32)
np_b2 = numpy_weights['b2'].astype(np.float32)

dict_times={}
dict_times["ctorch"]=[]
dict_times["cslow"]=[]
dict_times["cfast"]=[]

dict_pred={}
dict_pred["ctorch"]=[]
dict_pred["cslow"]=[]
dict_pred["cfast"]=[]

#length = test_labels.shape[0]
length = 100
correct = 0
skip = True
loop = tqdm(range(length),desc=" Inferring...")
for i in loop:
    c0 = test_images[i].reshape(1,1,28,28).astype(np.float32)
    torch_c0 = torch.from_numpy(c0).float()
    ############### CNN PyTorch Implementation ##################
    start_time = time.time()
    outputs = model(torch_c0)
    end_time = time.time()
    _, predicted1 = torch.max(outputs.data, 1)
    dict_times["ctorch"].append(end_time-start_time)
    dict_pred["ctorch"].append(np.array(predicted1))
    ############### CNN Slow Implementation #####################
    start_time = time.time()
    c1s,mask1s = Slow_ReLU_Conv(c0.astype(np.float32),np_k1,np_b_conv1,pad=1,stride=2)
    c2s,mask2s = Slow_ReLU_Conv(c1s.astype(np.float32),np_k2,np_b_conv2,pad=0,stride=2)
    c3s,mask3s = Slow_ReLU_Conv(c2s.astype(np.float32),np_k3,np_b_conv3,pad=0,stride=2)
    imlps = c3s.reshape(1,-1)
    _,_,_,res = ReLU_SoftMax_FullyConnected(imlps,np_w1,np_b1,np_w2,np_b2)
    predicted2 = np.argmax(res,1)
    end_time = time.time()
    dict_times["cslow"].append(end_time-start_time)
    dict_pred["cslow"].append(np.array(predicted2))
    ############### CNN Fast Implementation #####################
    start_time = time.time()
    c1f,mask1f = Fast_ReLU_Conv(c0.astype(np.float32),np_k1,np_b_conv1,pad=1,stride=2)
    c2f,mask2f = Fast_ReLU_Conv(c1f.astype(np.float32),np_k2,np_b_conv2,pad=0,stride=2)
    c3f,mask3f = Fast_ReLU_Conv(c2f.astype(np.float32),np_k3,np_b_conv3,pad=0,stride=2)
    imlpf = c3f.reshape(1,-1)
    _,_,_,res = ReLU_SoftMax_FullyConnected(imlpf,np_w1,np_b1,np_w2,np_b2)
    predicted3 = np.argmax(res,1)
    end_time = time.time()
    dict_times["cfast"].append(end_time-start_time)
    dict_pred["cfast"].append(np.array(predicted3))
    #####################################################################################
    #### Check that outputs of Slow Approach and Fast Approach have the same results ###
    t = int(predicted1[0])
    s = int(predicted2[0])
    f = int(predicted3[0])
    if t == s and t == f:
        correct+=1
    #####################################################################################
    ### Keep track of the times #########################################################
    tat = round(sum(dict_times['ctorch'])/(i+1),4)
    sat = round(sum(dict_times['cslow'])/(i+1),4)
    fat = round(sum(dict_times['cfast'])/(i+1),4)
    loop.set_postfix(average_times =f"t: {tat} s, s: {sat} s, f: {fat} s" , correct_predictions=f"{100*correct/(i+1)}%")
tat = round(sum(dict_times['ctorch'])/length,4)
sat = round(sum(dict_times['cslow'])/length,4)
fat = round(sum(dict_times['cfast'])/length,4)
print(f"Average forward execution time in seconds: \nPyTorch: {tat} s, \nSlow: {sat} s, \nFast: {fat} s")

 Inferring...: 100%|██████████| 100/100 [06:59<00:00,  4.20s/it, average_times=t: 0.0023 s, s: 4.1928 s, f: 0.0017 s, correct_predictions=100.0%]

Average forward execution time in seconds: 
PyTorch: 0.0023 s, 
Slow: 4.1928 s, 
Fast: 0.0017 s





## Training

### Test for Slow approach

In this panel the approach is tested to see if it learns or not. the test uses first just one image, then the first 100 for each eopch, in order to see if the loss descends during the training

#### Weights Initialization

In [251]:
k1 = np.random.rand(int(numpy_weights['k1'].flatten().shape[0])).reshape(numpy_weights['k1'].shape)
bc1 = np.random.rand(int(numpy_weights['b_conv1'].flatten().shape[0])).reshape(numpy_weights['b_conv1'].shape)
k2 = np.random.rand(int(numpy_weights['k2'].flatten().shape[0])).reshape(numpy_weights['k2'].shape)
bc2 = np.random.rand(int(numpy_weights['b_conv2'].flatten().shape[0])).reshape(numpy_weights['b_conv2'].shape)
k3 = np.random.rand(int(numpy_weights['k3'].flatten().shape[0])).reshape(numpy_weights['k3'].shape)
bc3 = np.random.rand(int(numpy_weights['b_conv3'].flatten().shape[0])).reshape(numpy_weights['b_conv3'].shape)
w1 = np.random.rand(int(numpy_weights['w1'].flatten().shape[0])).reshape(numpy_weights['w1'].shape)
b1 = np.random.rand(int(numpy_weights['b1'].flatten().shape[0])).reshape(numpy_weights['b1'].shape)
w2 = np.random.rand(int(numpy_weights['w2'].flatten().shape[0])).reshape(numpy_weights['w2'].shape)
b2 = np.random.rand(int(numpy_weights['b2'].flatten().shape[0])).reshape(numpy_weights['b2'].shape)

In [252]:
def avgList(listA):
    sum_li = sum(listA)
    length_li = len(listA)
    return round(sum_li/length_li,4)

#### Same Image

In [None]:
import matplotlib.pyplot as plt
ToBeTrained = True
if ToBeTrained:
    avg_loss = []
    forward_time = []
    backward_time = []
    numEpochs = 20
    bs = 1
    lr = 0.001
    loop = tqdm(range(numEpochs))
    for i in loop:
        c0 = train_images[0].reshape(1,1,28,28).astype(np.float32)
        
        # Forward
        sfts = time.time() # slow forward time start
        c1s,mask1s = Slow_ReLU_Conv(c0.astype(np.float32),k1,bc1,pad=0,stride=2)
        c2s,mask2s = Slow_ReLU_Conv(c1s.astype(np.float32),k2,bc2,pad=1,stride=2)
        c3s,mask3s = Slow_ReLU_Conv(c2s.astype(np.float32),k3,bc3,pad=0,stride=2)
        print(c0.shape)
        print(c1s.shape)
        print(c2s.shape)
        print(c3s.shape)
        imlps = c3s.reshape(1,-1)
        fl,fa,sl,sa = ReLU_SoftMax_FullyConnected(imlps,w1,b1,w2,b2)
        sfte = time.time() # slow forward time end
        sft = sfte - sfts
        forward_time.append(sft)
        
        # Loss
        loss = crossEntropy(sa,train_labels[0])
        avg_loss.append(loss)

        # Backward
        sbts = time.time() # slow backward time start
        dL_i_mlp,dL_dw1,dL_db1,dL_dw2,dL_db2 = ReLU_SoftMax_FC_Backward(bs,sa,train_labels[0],w1,w2,fa,fl,imlps)
        dL_i_mlp = dL_i_mlp.reshape(c3s.shape)

        gi3,gk3,gb3 = Slow_ReLU_Gradient(c2s,dL_i_mlp,k3,mask3s,pad=0,stride=2)

        gi2,gk2,gb2 = Slow_ReLU_Gradient(c1s,gi3,k2,mask2s,pad=1,stride=2)
        gi1,gk1,gb1 = Slow_ReLU_Gradient(c0,gi2,k1,mask1s,pad=0,stride=2)
        sbte = time.time() # slow backward time end
        sbt = sbte - sbts
        backward_time.append(sbt)

        # Weights update
        w1 -= lr*dL_dw1
        b1 -= lr*dL_db1
        w2 -= lr*dL_dw2
        b2 -= lr*dL_db2
        k3 -= lr*gk3
        k2 -= lr*gk2
        k1 -= lr*gk1
        bc3 -= lr*gb3.reshape(-1)
        bc2 -= lr*gb2.reshape(-1)
        bc1 -= lr*gb1.reshape(-1)
        
        if len(avg_loss) >= 2:
            loop.set_postfix(pendence=f" {avg_loss[i]-avg_loss[i-1]}",avgForward=f"{avgList(forward_time)} s", avgBackward=f"{avgList(backward_time)} s" )

    plt.plot(avg_loss)
    plt.show()
# 2.64135 <-> 2.64095
# 2.64055 <-> 2.64020
# 2.64015 <-> 2.63980
# 2.63910 <-> 2.63840

  0%|          | 0/20 [00:02<?, ?it/s]

(1, 1, 28, 28)
(1, 32, 14, 14)
(1, 64, 6, 6)
(1, 128, 2, 2)





ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1152 is different from 512)

These are the results for 20 epochs of one image:
- average forward time : 4.9017 s
- average backward time : 22.5251 s

Plot of the loss:

<img src="IMAGES\Slow Approach.png">


### Test for Fast approach

In this panel the approach is tested to see if it learns or not. the test uses first just one image, then the first 100 for each eopch, in order to see if the loss descends during the training

#### Weights Initialization

In [17]:
k1 = np.random.rand(int(numpy_weights['k1'].flatten().shape[0])).reshape(numpy_weights['k1'].shape)
bc1 = np.random.rand(int(numpy_weights['b_conv1'].flatten().shape[0])).reshape(numpy_weights['b_conv1'].shape)
k2 = np.random.rand(int(numpy_weights['k2'].flatten().shape[0])).reshape(numpy_weights['k2'].shape)
bc2 = np.random.rand(int(numpy_weights['b_conv2'].flatten().shape[0])).reshape(numpy_weights['b_conv2'].shape)
k3 = np.random.rand(int(numpy_weights['k3'].flatten().shape[0])).reshape(numpy_weights['k3'].shape)
bc3 = np.random.rand(int(numpy_weights['b_conv3'].flatten().shape[0])).reshape(numpy_weights['b_conv3'].shape)
w1 = np.random.rand(int(numpy_weights['w1'].flatten().shape[0])).reshape(numpy_weights['w1'].shape)
b1 = np.random.rand(int(numpy_weights['b1'].flatten().shape[0])).reshape(numpy_weights['b1'].shape)
w2 = np.random.rand(int(numpy_weights['w2'].flatten().shape[0])).reshape(numpy_weights['w2'].shape)
b2 = np.random.rand(int(numpy_weights['b2'].flatten().shape[0])).reshape(numpy_weights['b2'].shape)

In [18]:
def avgList(listA):
    sum_li = sum(listA)
    length_li = len(listA)
    return round(sum_li/length_li,4)

#### Same Image

In [284]:
import matplotlib.pyplot as plt
avg_loss = []
forward_time = []
backward_time = []
numEpochs = 20
bs = 1
lr = 0.001
loop = tqdm(range(numEpochs))
for i in loop:
    c0 = train_images[0].reshape(1,1,28,28).astype(np.float32)
    
    # Forward
    sfts = time.time() # slow forward time start
    c1s,mask1s = Fast_ReLU_Conv(c0.astype(np.float32),k1,bc1,pad=1,stride=2)
    c2s,mask2s = Fast_ReLU_Conv(c1s.astype(np.float32),k2,bc2,pad=1,stride=2)
    c3s,mask3s = Fast_ReLU_Conv(c2s.astype(np.float32),k3,bc3,pad=1,stride=2)
    imlps = c3s.reshape(1,-1)
    fl,fa,sl,sa = ReLU_SoftMax_FullyConnected(imlps,w1,b1,w2,b2)
    sfte = time.time() # slow forward time end
    sft = sfte - sfts
    forward_time.append(sft)
    
    # Loss
    loss = crossEntropy(sa,train_labels[0])
    avg_loss.append(loss)

    # Backward
    sbts = time.time() # slow backward time start
    dL_i_mlp,dL_dw1,dL_db1,dL_dw2,dL_db2 = ReLU_SoftMax_FC_Backward(bs,sa,train_labels[0],w1,w2,fa,fl,imlps)
    dL_i_mlp = dL_i_mlp.reshape(c3s.shape)

    gi3,gk3,gb3 = Fast_ReLU_Gradient(c2s,dL_i_mlp,k3,mask3s,pad=1,stride=2)
    print(c3s.shape)
    print(gi3.shape)
    print(c2s.shape)
    print(gk3.shape)
    print(gb3.shape)
    print(bc3.shape)
    gi2,gk2,gb2 = Fast_ReLU_Gradient(c1s,gi3,k2,mask2s,pad=1,stride=2)
    gi1,gk1,gb1 = Fast_ReLU_Gradient(c0,gi2,k1,mask1s,pad=1,stride=2)
    sbte = time.time() # slow backward time end
    sbt = sbte - sbts
    backward_time.append(sbt)

    # Weights update
    w1 -= lr*dL_dw1
    b1 -= lr*dL_db1
    w2 -= lr*dL_dw2
    b2 -= lr*dL_db2
    k3 -= lr*gk3
    k2 -= lr*gk2
    k1 -= lr*gk1
    bc3 -= lr*gb3
    bc2 -= lr*gb2
    bc1 -= lr*gb1
    
    if len(avg_loss) > 2:
        loop.set_postfix(pendence=f" {avg_loss[i]-avg_loss[i-1]}",avgForward=f"{avgList(forward_time)} s", avgBackward=f"{avgList(backward_time)} s" )

plt.plot(avg_loss)
plt.show()

  0%|          | 0/20 [00:00<?, ?it/s]


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1152 is different from 2048)

In [None]:
51200/32/64/5/5
800*64

51200