In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
%matplotlib inline
from torchvision.models import resnet18, ResNet18_Weights

# Set the device
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")


Using device: mps


In [2]:
#!pip install tensorboard
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("./logs")

In [7]:
#CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
batch_size = 32
# Downloading/Loading CIFAR10 data
trainset = torchvision.datasets.CIFAR10(root='./CIFAR10', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./CIFAR10', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size*4, shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [4]:
from tqdm import tqdm
import torch.optim as optim
import time

In [13]:
#Create model ResNet19
#Add fc layer to pretrained ResNet18(18 + 1)

In [14]:
target = len(classes)

resnet19 = resnet18(weights=ResNet18_Weights.DEFAULT)

num_ftrs = resnet19.fc.in_features
resnet19.fc = nn.Sequential(nn.Linear(num_ftrs, num_ftrs//2),nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(num_ftrs//2, target))

print(resnet19)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet19.parameters(), lr = 0.001, momentum = 0.9)
len(trainloader)
resnet19.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
# Number of training epochs
num_epochs = 1
# Training loop

for epoch in range(num_epochs):
    resnet19.train()
    loop = tqdm(trainloader)
    eval_loop = tqdm(testloader)
    running_loss = 0.0
    start_time = time.time()

    for i, (inputs, labels) in enumerate(loop, 1):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = resnet19(inputs)
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        loop.set_postfix(loss = loss.item())
    end_time = time.time()
    print(f"Epoch {epoch + 1} took {end_time - start_time:.2f} seconds")
    print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / i:.3f}")
    
    #eval step
    correct = 0
    total = 0
    resnet19.eval()
    # Disable gradient computation for evaluation
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(eval_loop, 1):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = resnet19(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy on the test set: {100 * correct / total:.2f}%")


print("Finished Training")


In [8]:
#Create model ResNet19 2
#Add conv layer to pretrained ResNet18

In [5]:
resnet19_conv = resnet18(weights=ResNet18_Weights.DEFAULT)

additional_conv_layer = nn.Sequential(
    nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(inplace=True))
resnet19_conv.layer4.add_module("additional_conv_layer", additional_conv_layer)

num_ftrs = resnet19_conv.fc.in_features
resnet19_conv.fc = nn.Linear(num_ftrs, target)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet19_conv.parameters(), lr = 0.001, momentum = 0.9)

resnet19_conv.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [10]:
# Number of training epochs
num_epochs = 1
# Training loop

for epoch in range(num_epochs):
    resnet19_conv.train()
    loop = tqdm(trainloader)
    eval_loop = tqdm(testloader)
    running_loss = 0.0
    start_time = time.time()

    for i, (inputs, labels) in enumerate(loop, 1):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = resnet19_conv(inputs)
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        loop.set_postfix(loss = loss.item())
    end_time = time.time()
    print(f"Epoch {epoch + 1} took {end_time - start_time:.2f} seconds")
    print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / i:.3f}")
    
    #eval step
    correct = 0
    total = 0
    resnet19_conv.eval()
    # Disable gradient computation for evaluation
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(eval_loop, 1):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = resnet19_conv(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy on the test set: {100 * correct / total:.2f}%")


print("Finished Training")

  0%|                                                   | 0/12500 [00:00<?, ?it/s]
100%|████████████████████████████| 12500/12500 [06:37<00:00, 31.44it/s, loss=2.01][A

Epoch 1 took 397.60 seconds
[1, 12501] loss: 2.100




  0%|                                       | 1/2500 [06:40<277:44:54, 400.12s/it][A
  1%|▏                                       | 13/2500 [06:40<15:14:27, 22.06s/it][A
  1%|▍                                        | 26/2500 [06:40<6:09:38,  8.96s/it][A
  2%|▋                                        | 39/2500 [06:40<3:19:04,  4.85s/it][A
  2%|▊                                        | 52/2500 [06:40<1:59:44,  2.93s/it][A
  3%|█                                        | 65/2500 [06:40<1:16:11,  1.88s/it][A
  3%|█▎                                         | 78/2500 [06:40<50:05,  1.24s/it][A
  4%|█▌                                         | 91/2500 [06:40<33:37,  1.19it/s][A
  4%|█▋                                        | 104/2500 [06:40<22:54,  1.74it/s][A
  5%|█▉                                        | 117/2500 [06:41<15:45,  2.52it/s][A
  5%|██▏                                       | 130/2500 [06:41<10:55,  3.61it/s][A
  6%|██▍                                       | 143

Accuracy on the test set: 41.02%
Finished Training





In [None]:
#Test 3 structure like https://arxiv.org/pdf/2011.05280.pdf

In [31]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0),
                nn.BatchNorm2d(out_channels)
            )
        self.relu_out = nn.ReLU()

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(x)
        out = self.relu_out(out)
        return out

class ResNet19_SNN(nn.Module):
    def __init__(self):
        super(ResNet19_SNN, self).__init__()

        self.conv1 = nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(128)
        self.relu = nn.ReLU()

        self.block1 = nn.Sequential(
            BasicBlock(128, 128, stride=1),
            BasicBlock(128, 128, stride=1),
            BasicBlock(128, 128, stride=1)
        )

        self.block2 = nn.Sequential(
            BasicBlock(128, 256, stride=2),
            BasicBlock(256, 256, stride=1),
            BasicBlock(256, 256, stride=1),
        )

        self.block3 = nn.Sequential(
            BasicBlock(256, 512, stride=2),
            BasicBlock(512, 512, stride=1)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.fc2 = nn.Linear(256, 10)
        

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)

        x = self.avgpool(x)
        #print("dopo pool", x.size())
        x = x.view(x.size(0), -1)
        #print("wiew pool", x.size())
        x = self.fc1(x)
        x = self.relu2(x)
        x = self.fc2(x)
        
        return x
ResNet19_SNN_model = ResNet19_SNN()
ResNet19_SNN_model.to(device)

ResNet19_SNN(
  (conv1): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (block1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
      (relu_out): ReLU()
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): Batch

In [32]:
import torch.nn.init as init

def initialize_weights(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
            init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm2d):
        init.constant_(m.weight, 1)
        init.constant_(m.bias, 0)
    elif isinstance(m, nn.Linear):
        init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        init.constant_(m.bias, 0)

In [36]:
import torch.optim.lr_scheduler as ls


#ResNet19_SNN_model.apply(initialize_weights)
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(ResNet19_SNN_model.parameters(), lr = 0.01)
optimizer = optim.SGD(ResNet19_SNN_model.parameters(), lr = 0.01, weight_decay = 0.01)
#adam / dro
scheduler = ls.StepLR(optimizer, step_size=30, gamma=0.3)
#https://medium.com/analytics-vidhya/deep-learning-basics-weight-decay-3c68eb4344e9

# Number of training epochs
num_epochs = 50
# Training loop

for epoch in range(num_epochs):
    ResNet19_SNN_model.train()
    loop = tqdm(trainloader)
    eval_loop = tqdm(testloader)
    running_loss = 0.0
    start_time = time.time()
    correct = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(loop, 1):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = ResNet19_SNN_model(inputs)
        loss = criterion(outputs, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        running_loss += loss.item()
        loop.set_postfix(loss = loss.item())
    end_time = time.time()
    scheduler.step()
    print(f"Epoch {epoch + 1} took {end_time - start_time:.2f} seconds")
    print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / i:.3f}")
    print(f"lr: {scheduler.get_last_lr()[0]}")
    print(f"Accuracy on the test set: {100 * correct / total:.2f}%")
    writer.add_scalar("LOSS/Training",loss.item(), epoch)
    writer.add_scalar("LR/Training",scheduler.get_last_lr()[0], epoch)
    writer.add_scalar("ACCURACY/Training", correct / total, epoch)
    
    #eval step
    correct = 0
    total = 0
    ResNet19_SNN_model.eval()
    # Disable gradient computation for evaluation
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(eval_loop, 1):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = ResNet19_SNN_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy on the test set: {100 * correct / total:.2f}%")
    writer.add_scalar("ACCURACY/Evaluation",(correct / total), epoch) 
print("Finished Training")

  0%|                                                    | 0/1563 [00:00<?, ?it/s]

  0%|                                                      | 0/79 [00:45<?, ?it/s][A[A
100%|█████████████████████████████| 1563/1563 [07:39<00:00,  3.40it/s, loss=0.876]

Epoch 1 took 459.85 seconds
[1,  1564] loss: 0.888
lr: 0.01
Accuracy on the test set: 68.50%





  1%|▌                                         | 1/79 [07:43<10:01:55, 463.01s/it][A[A

  3%|█                                          | 2/79 [07:43<4:04:53, 190.82s/it][A[A

  4%|█▋                                         | 3/79 [07:43<2:11:29, 103.81s/it][A[A

  5%|██▏                                         | 4/79 [07:43<1:18:40, 62.94s/it][A[A

  6%|██▉                                           | 5/79 [07:44<49:45, 40.34s/it][A[A

  8%|███▍                                          | 6/79 [07:44<32:30, 26.72s/it][A[A

  9%|████                                          | 7/79 [07:44<21:41, 18.08s/it][A[A

 10%|████▋                                         | 8/79 [07:44<14:41, 12.41s/it][A[A

 11%|█████▏                                        | 9/79 [07:45<10:03,  8.62s/it][A[A

 13%|█████▋                                       | 10/79 [07:45<06:57,  6.04s/it][A[A

 14%|██████▎                                      | 11/79 [07:45<04:50,  4.28s/it][A[A

 15%|██

Accuracy on the test set: 56.05%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:32<00:00,  3.05it/s, loss=0.589][A

Epoch 2 took 512.84 seconds
[2,  1564] loss: 0.673
lr: 0.01
Accuracy on the test set: 76.85%




  1%|▌                                         | 1/79 [08:35<11:10:14, 515.57s/it][A
  3%|█                                          | 2/79 [08:35<4:32:38, 212.45s/it][A
  4%|█▋                                         | 3/79 [08:36<2:26:22, 115.56s/it][A
  5%|██▏                                         | 4/79 [08:36<1:27:32, 70.04s/it][A
  6%|██▉                                           | 5/79 [08:36<55:20, 44.87s/it][A
  8%|███▍                                          | 6/79 [08:36<36:08, 29.70s/it][A
  9%|████                                          | 7/79 [08:37<24:05, 20.08s/it][A
 10%|████▋                                         | 8/79 [08:37<16:17, 13.77s/it][A
 11%|█████▏                                        | 9/79 [08:37<11:07,  9.54s/it][A
 13%|█████▋                                       | 10/79 [08:37<07:40,  6.67s/it][A
 14%|██████▎                                      | 11/79 [08:38<05:20,  4.71s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 70.34%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:37<00:00,  3.02it/s, loss=0.372][A

Epoch 3 took 517.21 seconds
[3,  1564] loss: 0.552
lr: 0.01
Accuracy on the test set: 81.26%




  1%|▌                                         | 1/79 [08:40<11:16:20, 520.26s/it][A
  3%|█                                          | 2/79 [08:40<4:35:06, 214.37s/it][A
  4%|█▋                                         | 3/79 [08:40<2:27:41, 116.60s/it][A
  5%|██▏                                         | 4/79 [08:41<1:28:20, 70.67s/it][A
  6%|██▉                                           | 5/79 [08:41<55:50, 45.28s/it][A
  8%|███▍                                          | 6/79 [08:41<36:27, 29.97s/it][A
  9%|████                                          | 7/79 [08:41<24:18, 20.25s/it][A
 10%|████▋                                         | 8/79 [08:42<16:26, 13.89s/it][A
 11%|█████▏                                        | 9/79 [08:42<11:13,  9.63s/it][A
 13%|█████▋                                       | 10/79 [08:42<07:44,  6.73s/it][A
 14%|██████▎                                      | 11/79 [08:42<05:22,  4.75s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 73.97%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:46<00:00,  2.97it/s, loss=0.457][A

Epoch 4 took 526.33 seconds
[4,  1564] loss: 0.475
lr: 0.01
Accuracy on the test set: 84.00%




  1%|▌                                         | 1/79 [08:49<11:28:03, 529.28s/it][A
  3%|█                                          | 2/79 [08:49<4:39:52, 218.09s/it][A
  4%|█▋                                         | 3/79 [08:49<2:30:15, 118.62s/it][A
  5%|██▏                                         | 4/79 [08:50<1:29:51, 71.89s/it][A
  6%|██▉                                           | 5/79 [08:50<56:48, 46.06s/it][A
  8%|███▍                                          | 6/79 [08:50<37:05, 30.49s/it][A
  9%|████                                          | 7/79 [08:50<24:43, 20.60s/it][A
 10%|████▋                                         | 8/79 [08:51<16:42, 14.13s/it][A
 11%|█████▏                                        | 9/79 [08:51<11:25,  9.79s/it][A
 13%|█████▋                                       | 10/79 [08:51<07:52,  6.85s/it][A
 14%|██████▎                                      | 11/79 [08:51<05:28,  4.83s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 78.92%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:50<00:00,  2.95it/s, loss=0.289][A

Epoch 5 took 530.73 seconds
[5,  1564] loss: 0.420
lr: 0.01
Accuracy on the test set: 86.08%




  1%|▌                                         | 1/79 [08:53<11:33:50, 533.72s/it][A
  3%|█                                          | 2/79 [08:53<4:42:13, 219.92s/it][A
  4%|█▋                                         | 3/79 [08:54<2:31:30, 119.62s/it][A
  5%|██▏                                         | 4/79 [08:54<1:30:37, 72.49s/it][A
  6%|██▉                                           | 5/79 [08:54<57:16, 46.44s/it][A
  8%|███▍                                          | 6/79 [08:55<37:24, 30.74s/it][A
  9%|████                                          | 7/79 [08:55<24:55, 20.77s/it][A
 10%|████▋                                         | 8/79 [08:55<16:51, 14.24s/it][A
 11%|█████▏                                        | 9/79 [08:55<11:30,  9.87s/it][A
 13%|█████▋                                       | 10/79 [08:56<07:56,  6.90s/it][A
 14%|██████▎                                      | 11/79 [08:56<05:30,  4.87s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 75.85%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [09:06<00:00,  2.86it/s, loss=0.358][A

Epoch 6 took 546.50 seconds
[6,  1564] loss: 0.386
lr: 0.01
Accuracy on the test set: 87.25%




  1%|▌                                         | 1/79 [09:09<11:54:53, 549.92s/it][A
  3%|█                                          | 2/79 [09:10<4:50:48, 226.60s/it][A
  4%|█▋                                         | 3/79 [09:10<2:36:07, 123.26s/it][A
  5%|██▏                                         | 4/79 [09:10<1:33:22, 74.71s/it][A
  6%|██▉                                           | 5/79 [09:11<59:01, 47.86s/it][A
  8%|███▍                                          | 6/79 [09:11<38:32, 31.68s/it][A
  9%|████                                          | 7/79 [09:11<25:41, 21.41s/it][A
 10%|████▋                                         | 8/79 [09:11<17:22, 14.68s/it][A
 11%|█████▏                                        | 9/79 [09:12<11:52, 10.17s/it][A
 13%|█████▋                                       | 10/79 [09:12<08:10,  7.12s/it][A
 14%|██████▎                                      | 11/79 [09:12<05:41,  5.02s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 79.68%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:51<00:00,  2.94it/s, loss=0.227][A

Epoch 7 took 531.25 seconds
[7,  1564] loss: 0.357
lr: 0.01
Accuracy on the test set: 88.54%




  1%|▌                                         | 1/79 [08:54<11:34:50, 534.49s/it][A
  3%|█                                          | 2/79 [08:54<4:42:38, 220.24s/it][A
  4%|█▋                                         | 3/79 [08:55<2:31:43, 119.79s/it][A
  5%|██▏                                         | 4/79 [08:55<1:30:45, 72.60s/it][A
  6%|██▉                                           | 5/79 [08:55<57:21, 46.51s/it][A
  8%|███▍                                          | 6/79 [08:55<37:27, 30.79s/it][A
  9%|████                                          | 7/79 [08:56<24:57, 20.80s/it][A
 10%|████▋                                         | 8/79 [08:56<16:52, 14.26s/it][A
 11%|█████▏                                        | 9/79 [08:56<11:31,  9.88s/it][A
 13%|█████▋                                       | 10/79 [08:56<07:56,  6.91s/it][A
 14%|██████▎                                      | 11/79 [08:57<05:31,  4.87s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 80.76%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [09:38<00:00,  2.70it/s, loss=0.524][A

Epoch 8 took 578.98 seconds
[8,  1564] loss: 0.330
lr: 0.01
Accuracy on the test set: 89.29%




  1%|▌                                         | 1/79 [09:42<12:36:48, 582.16s/it][A
  3%|█                                          | 2/79 [09:42<5:07:49, 239.87s/it][A
  4%|█▋                                         | 3/79 [09:42<2:45:14, 130.46s/it][A
  5%|██▏                                         | 4/79 [09:42<1:38:49, 79.05s/it][A
  6%|██▊                                         | 5/79 [09:43<1:02:27, 50.64s/it][A
  8%|███▍                                          | 6/79 [09:43<40:46, 33.51s/it][A
  9%|████                                          | 7/79 [09:43<27:09, 22.64s/it][A
 10%|████▋                                         | 8/79 [09:43<18:21, 15.51s/it][A
 11%|█████▏                                        | 9/79 [09:44<12:31, 10.74s/it][A
 13%|█████▋                                       | 10/79 [09:44<08:37,  7.50s/it][A
 14%|██████▎                                      | 11/79 [09:44<05:59,  5.29s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 81.18%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:49<00:00,  2.95it/s, loss=0.773][A

Epoch 9 took 529.23 seconds
[9,  1564] loss: 0.310
lr: 0.01
Accuracy on the test set: 90.17%




  1%|▌                                         | 1/79 [08:52<11:32:04, 532.37s/it][A
  3%|█                                          | 2/79 [08:52<4:41:31, 219.36s/it][A
  4%|█▋                                         | 3/79 [08:52<2:31:07, 119.32s/it][A
  5%|██▏                                         | 4/79 [08:53<1:30:23, 72.31s/it][A
  6%|██▉                                           | 5/79 [08:53<57:08, 46.33s/it][A
  8%|███▍                                          | 6/79 [08:53<37:18, 30.66s/it][A
  9%|████                                          | 7/79 [08:53<24:51, 20.72s/it][A
 10%|████▋                                         | 8/79 [08:54<16:48, 14.21s/it][A
 11%|█████▏                                        | 9/79 [08:54<11:29,  9.84s/it][A
 13%|█████▋                                       | 10/79 [08:54<07:55,  6.88s/it][A
 14%|██████▎                                      | 11/79 [08:54<05:30,  4.86s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 63.14%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:29<00:00,  3.07it/s, loss=0.285][A

Epoch 10 took 509.41 seconds
[10,  1564] loss: 0.295
lr: 0.01
Accuracy on the test set: 90.68%




  1%|▌                                         | 1/79 [08:32<11:05:56, 512.27s/it][A
  3%|█                                          | 2/79 [08:32<4:30:53, 211.08s/it][A
  4%|█▋                                         | 3/79 [08:32<2:25:26, 114.82s/it][A
  5%|██▏                                         | 4/79 [08:33<1:26:59, 69.59s/it][A
  6%|██▉                                           | 5/79 [08:33<54:59, 44.59s/it][A
  8%|███▍                                          | 6/79 [08:33<35:54, 29.51s/it][A
  9%|████                                          | 7/79 [08:33<23:56, 19.95s/it][A
 10%|████▋                                         | 8/79 [08:34<16:11, 13.68s/it][A
 11%|█████▏                                        | 9/79 [08:34<11:03,  9.48s/it][A
 13%|█████▋                                       | 10/79 [08:34<07:37,  6.63s/it][A
 14%|██████▎                                      | 11/79 [08:34<05:18,  4.68s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 79.12%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:29<00:00,  3.07it/s, loss=0.159][A

Epoch 11 took 509.10 seconds
[11,  1564] loss: 0.279
lr: 0.01
Accuracy on the test set: 91.40%




  1%|▌                                         | 1/79 [08:32<11:05:40, 512.06s/it][A
  3%|█                                          | 2/79 [08:32<4:30:47, 211.00s/it][A
  4%|█▋                                         | 3/79 [08:32<2:25:22, 114.77s/it][A
  5%|██▏                                         | 4/79 [08:32<1:26:57, 69.56s/it][A
  6%|██▉                                           | 5/79 [08:33<54:58, 44.57s/it][A
  8%|███▍                                          | 6/79 [08:33<35:53, 29.50s/it][A
  9%|████                                          | 7/79 [08:33<23:55, 19.94s/it][A
 10%|████▋                                         | 8/79 [08:33<16:10, 13.67s/it][A
 11%|█████▏                                        | 9/79 [08:34<11:03,  9.48s/it][A
 13%|█████▋                                       | 10/79 [08:34<07:37,  6.63s/it][A
 14%|██████▎                                      | 11/79 [08:34<05:18,  4.68s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 83.80%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:25<00:00,  3.09it/s, loss=0.325][A

Epoch 12 took 505.41 seconds
[12,  1564] loss: 0.257
lr: 0.01
Accuracy on the test set: 92.16%




  1%|▌                                         | 1/79 [08:28<11:00:41, 508.23s/it][A
  3%|█                                          | 2/79 [08:28<4:28:45, 209.42s/it][A
  4%|█▋                                         | 3/79 [08:28<2:24:17, 113.91s/it][A
  5%|██▏                                         | 4/79 [08:28<1:26:18, 69.04s/it][A
  6%|██▉                                           | 5/79 [08:29<54:33, 44.24s/it][A
  8%|███▍                                          | 6/79 [08:29<35:37, 29.28s/it][A
  9%|████                                          | 7/79 [08:29<23:45, 19.79s/it][A
 10%|████▋                                         | 8/79 [08:30<16:03, 13.57s/it][A
 11%|█████▏                                        | 9/79 [08:30<10:58,  9.41s/it][A
 13%|█████▋                                       | 10/79 [08:30<07:34,  6.58s/it][A
 14%|██████▎                                      | 11/79 [08:30<05:16,  4.65s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 72.10%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:21<00:00,  3.12it/s, loss=0.532][A

Epoch 13 took 501.46 seconds
[13,  1564] loss: 0.250
lr: 0.01
Accuracy on the test set: 92.44%




  1%|▌                                         | 1/79 [08:24<10:55:46, 504.45s/it][A
  3%|█                                          | 2/79 [08:24<4:26:45, 207.87s/it][A
  4%|█▋                                         | 3/79 [08:24<2:23:13, 113.07s/it][A
  5%|██▏                                         | 4/79 [08:25<1:25:39, 68.53s/it][A
  6%|██▉                                           | 5/79 [08:25<54:09, 43.91s/it][A
  8%|███▍                                          | 6/79 [08:25<35:22, 29.07s/it][A
  9%|████                                          | 7/79 [08:25<23:34, 19.65s/it][A
 10%|████▋                                         | 8/79 [08:26<15:56, 13.48s/it][A
 11%|█████▏                                        | 9/79 [08:26<10:54,  9.34s/it][A
 13%|█████▋                                       | 10/79 [08:26<07:31,  6.54s/it][A
 14%|██████▎                                      | 11/79 [08:27<05:13,  4.61s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 83.24%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:20<00:00,  3.12it/s, loss=0.126][A

Epoch 14 took 500.75 seconds
[14,  1564] loss: 0.238
lr: 0.01
Accuracy on the test set: 92.88%




  1%|▌                                         | 1/79 [08:23<10:54:48, 503.70s/it][A
  3%|█                                          | 2/79 [08:23<4:26:21, 207.56s/it][A
  4%|█▋                                         | 3/79 [08:24<2:23:00, 112.90s/it][A
  5%|██▏                                         | 4/79 [08:24<1:25:32, 68.43s/it][A
  6%|██▉                                           | 5/79 [08:24<54:04, 43.85s/it][A
  8%|███▍                                          | 6/79 [08:24<35:18, 29.03s/it][A
  9%|████                                          | 7/79 [08:25<23:32, 19.62s/it][A
 10%|████▋                                         | 8/79 [08:25<15:55, 13.46s/it][A
 11%|█████▏                                        | 9/79 [08:25<10:53,  9.33s/it][A
 13%|█████▋                                       | 10/79 [08:26<07:30,  6.53s/it][A
 14%|██████▎                                      | 11/79 [08:26<05:13,  4.61s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 84.27%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [09:44<00:00,  2.67it/s, loss=0.501][A

Epoch 15 took 584.95 seconds
[15,  1564] loss: 0.226
lr: 0.01
Accuracy on the test set: 93.22%




  1%|▌                                         | 1/79 [09:49<12:46:10, 589.37s/it][A
  3%|█                                          | 2/79 [09:49<5:11:48, 242.96s/it][A
  4%|█▋                                         | 3/79 [09:50<2:47:28, 132.22s/it][A
  5%|██▏                                         | 4/79 [09:50<1:40:14, 80.19s/it][A
  6%|██▊                                         | 5/79 [09:51<1:03:25, 51.42s/it][A
  8%|███▍                                          | 6/79 [09:51<41:28, 34.08s/it][A
  9%|████                                          | 7/79 [09:51<27:41, 23.08s/it][A
 10%|████▋                                         | 8/79 [09:52<18:46, 15.87s/it][A
 11%|█████▏                                        | 9/79 [09:52<12:52, 11.04s/it][A
 13%|█████▋                                       | 10/79 [09:53<08:55,  7.76s/it][A
 14%|██████▎                                      | 11/79 [09:53<06:15,  5.52s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 75.91%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [12:08<00:00,  2.15it/s, loss=0.468][A

Epoch 16 took 728.65 seconds
[16,  1564] loss: 0.218
lr: 0.01
Accuracy on the test set: 93.77%




  1%|▌                                         | 1/79 [12:13<15:53:26, 733.41s/it][A
  3%|█                                          | 2/79 [12:13<6:27:45, 302.15s/it][A
  4%|█▋                                         | 3/79 [12:13<3:28:06, 164.30s/it][A
  5%|██▏                                         | 4/79 [12:14<2:04:26, 99.55s/it][A
  6%|██▊                                         | 5/79 [12:14<1:18:41, 63.80s/it][A
  8%|███▍                                          | 6/79 [12:15<51:33, 42.37s/it][A
  9%|████                                          | 7/79 [12:16<34:30, 28.75s/it][A
 10%|████▋                                         | 8/79 [12:16<23:25, 19.79s/it][A
 11%|█████▏                                        | 9/79 [12:17<16:07, 13.83s/it][A
 13%|█████▋                                       | 10/79 [12:18<11:14,  9.78s/it][A
 14%|██████▎                                      | 11/79 [12:18<07:54,  6.98s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 74.94%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [12:14<00:00,  2.13it/s, loss=0.599][A

Epoch 17 took 734.11 seconds
[17,  1564] loss: 0.216
lr: 0.01
Accuracy on the test set: 93.68%




  1%|▌                                         | 1/79 [12:17<15:58:14, 737.11s/it][A
  3%|█                                          | 2/79 [12:17<6:29:42, 303.67s/it][A
  4%|█▋                                         | 3/79 [12:17<3:29:09, 165.12s/it][A
  5%|██▏                                        | 4/79 [12:17<2:05:02, 100.04s/it][A
  6%|██▊                                         | 5/79 [12:18<1:19:00, 64.06s/it][A
  8%|███▍                                          | 6/79 [12:18<51:32, 42.36s/it][A
  9%|████                                          | 7/79 [12:18<34:19, 28.60s/it][A
 10%|████▋                                         | 8/79 [12:18<23:09, 19.58s/it][A
 11%|█████▏                                        | 9/79 [12:19<15:47, 13.54s/it][A
 13%|█████▋                                       | 10/79 [12:19<10:51,  9.44s/it][A
 14%|██████▎                                      | 11/79 [12:19<07:30,  6.63s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 74.64%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:08<00:00,  3.65it/s, loss=0.269][A

Epoch 18 took 428.23 seconds
[18,  1564] loss: 0.210
lr: 0.01
Accuracy on the test set: 94.01%




  1%|▌                                          | 1/79 [07:11<9:20:32, 431.18s/it][A
  3%|█                                          | 2/79 [07:11<3:48:02, 177.70s/it][A
  4%|█▋                                          | 3/79 [07:11<2:02:27, 96.68s/it][A
  5%|██▏                                         | 4/79 [07:11<1:13:15, 58.61s/it][A
  6%|██▉                                           | 5/79 [07:12<46:19, 37.57s/it][A
  8%|███▍                                          | 6/79 [07:12<30:16, 24.88s/it][A
  9%|████                                          | 7/79 [07:12<20:11, 16.83s/it][A
 10%|████▋                                         | 8/79 [07:12<13:40, 11.55s/it][A
 11%|█████▏                                        | 9/79 [07:13<09:21,  8.02s/it][A
 13%|█████▋                                       | 10/79 [07:13<06:28,  5.62s/it][A
 14%|██████▎                                      | 11/79 [07:13<04:30,  3.98s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 82.88%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:08<00:00,  3.65it/s, loss=0.311][A

Epoch 19 took 428.38 seconds
[19,  1564] loss: 0.205
lr: 0.01
Accuracy on the test set: 94.24%




  1%|▌                                          | 1/79 [07:11<9:20:59, 431.53s/it][A
  3%|█                                          | 2/79 [07:11<3:48:14, 177.85s/it][A
  4%|█▋                                          | 3/79 [07:12<2:02:34, 96.77s/it][A
  5%|██▏                                         | 4/79 [07:12<1:13:19, 58.67s/it][A
  6%|██▉                                           | 5/79 [07:12<46:22, 37.60s/it][A
  8%|███▍                                          | 6/79 [07:12<30:18, 24.91s/it][A
  9%|████                                          | 7/79 [07:13<20:13, 16.85s/it][A
 10%|████▋                                         | 8/79 [07:13<13:41, 11.57s/it][A
 11%|█████▏                                        | 9/79 [07:13<09:22,  8.03s/it][A
 13%|█████▋                                       | 10/79 [07:13<06:28,  5.63s/it][A
 14%|██████▎                                      | 11/79 [07:14<04:31,  3.99s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 83.14%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:22<00:00,  3.53it/s, loss=0.283][A

Epoch 20 took 442.62 seconds
[20,  1564] loss: 0.200
lr: 0.01
Accuracy on the test set: 94.33%




  1%|▌                                          | 1/79 [07:25<9:39:13, 445.56s/it][A
  3%|█                                          | 2/79 [07:25<3:55:38, 183.62s/it][A
  4%|█▋                                          | 3/79 [07:26<2:06:31, 99.89s/it][A
  5%|██▏                                         | 4/79 [07:26<1:15:41, 60.56s/it][A
  6%|██▉                                           | 5/79 [07:26<47:52, 38.81s/it][A
  8%|███▍                                          | 6/79 [07:26<31:16, 25.70s/it][A
  9%|████                                          | 7/79 [07:27<20:51, 17.38s/it][A
 10%|████▋                                         | 8/79 [07:27<14:07, 11.93s/it][A
 11%|█████▏                                        | 9/79 [07:27<09:39,  8.28s/it][A
 13%|█████▋                                       | 10/79 [07:27<06:40,  5.80s/it][A
 14%|██████▎                                      | 11/79 [07:28<04:39,  4.11s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 76.87%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|██████████████████████████████| 1563/1563 [07:34<00:00,  3.44it/s, loss=0.35][A

Epoch 21 took 454.76 seconds
[21,  1564] loss: 0.196
lr: 0.01
Accuracy on the test set: 94.40%




  1%|▌                                          | 1/79 [07:37<9:54:49, 457.55s/it][A
  3%|█                                          | 2/79 [07:37<4:01:58, 188.56s/it][A
  4%|█▋                                         | 3/79 [07:38<2:09:55, 102.58s/it][A
  5%|██▏                                         | 4/79 [07:38<1:17:43, 62.18s/it][A
  6%|██▉                                           | 5/79 [07:38<49:08, 39.85s/it][A
  8%|███▍                                          | 6/79 [07:38<32:06, 26.39s/it][A
  9%|████                                          | 7/79 [07:39<21:24, 17.85s/it][A
 10%|████▋                                         | 8/79 [07:39<14:29, 12.25s/it][A
 11%|█████▏                                        | 9/79 [07:39<09:54,  8.50s/it][A
 13%|█████▋                                       | 10/79 [07:39<06:50,  5.95s/it][A
 14%|██████▎                                      | 11/79 [07:40<04:46,  4.21s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 79.09%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:45<00:00,  3.35it/s, loss=0.111][A

Epoch 22 took 465.99 seconds
[22,  1564] loss: 0.193
lr: 0.01
Accuracy on the test set: 94.58%




  1%|▌                                         | 1/79 [07:48<10:09:28, 468.82s/it][A
  3%|█                                          | 2/79 [07:49<4:07:56, 193.20s/it][A
  4%|█▋                                         | 3/79 [07:49<2:13:07, 105.10s/it][A
  5%|██▏                                         | 4/79 [07:49<1:19:38, 63.71s/it][A
  6%|██▉                                           | 5/79 [07:49<50:21, 40.83s/it][A
  8%|███▍                                          | 6/79 [07:50<32:53, 27.03s/it][A
  9%|████                                          | 7/79 [07:50<21:56, 18.28s/it][A
 10%|████▋                                         | 8/79 [07:50<14:50, 12.54s/it][A
 11%|█████▏                                        | 9/79 [07:50<10:09,  8.70s/it][A
 13%|█████▋                                       | 10/79 [07:51<07:00,  6.09s/it][A
 14%|██████▎                                      | 11/79 [07:51<04:52,  4.31s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 83.66%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:49<00:00,  3.33it/s, loss=0.272][A

Epoch 23 took 469.68 seconds
[23,  1564] loss: 0.193
lr: 0.01
Accuracy on the test set: 94.51%




  1%|▌                                         | 1/79 [07:52<10:14:24, 472.62s/it][A
  3%|█                                          | 2/79 [07:52<4:09:56, 194.76s/it][A
  4%|█▋                                         | 3/79 [07:53<2:14:11, 105.95s/it][A
  5%|██▏                                         | 4/79 [07:53<1:20:16, 64.22s/it][A
  6%|██▉                                           | 5/79 [07:53<50:45, 41.16s/it][A
  8%|███▍                                          | 6/79 [07:53<33:09, 27.25s/it][A
  9%|████                                          | 7/79 [07:54<22:06, 18.42s/it][A
 10%|████▋                                         | 8/79 [07:54<14:57, 12.64s/it][A
 11%|█████▏                                        | 9/79 [07:54<10:13,  8.77s/it][A
 13%|█████▋                                       | 10/79 [07:54<07:03,  6.14s/it][A
 14%|██████▎                                      | 11/79 [07:55<04:55,  4.34s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 82.82%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:02<00:00,  3.24it/s, loss=0.311][A

Epoch 24 took 482.33 seconds
[24,  1564] loss: 0.190
lr: 0.01
Accuracy on the test set: 94.70%




  1%|▌                                         | 1/79 [08:05<10:31:04, 485.44s/it][A
  3%|█                                          | 2/79 [08:05<4:16:44, 200.06s/it][A
  4%|█▋                                         | 3/79 [08:05<2:17:50, 108.83s/it][A
  5%|██▏                                         | 4/79 [08:06<1:22:27, 65.96s/it][A
  6%|██▉                                           | 5/79 [08:06<52:08, 42.27s/it][A
  8%|███▍                                          | 6/79 [08:06<34:02, 27.99s/it][A
  9%|████                                          | 7/79 [08:07<22:42, 18.92s/it][A
 10%|████▋                                         | 8/79 [08:07<15:21, 12.98s/it][A
 11%|█████▏                                        | 9/79 [08:07<10:30,  9.01s/it][A
 13%|█████▋                                       | 10/79 [08:07<07:15,  6.30s/it][A
 14%|██████▎                                      | 11/79 [08:08<05:02,  4.45s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 75.94%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:58<00:00,  3.27it/s, loss=0.183][A

Epoch 25 took 478.24 seconds
[25,  1564] loss: 0.181
lr: 0.01
Accuracy on the test set: 94.93%




  1%|▌                                         | 1/79 [08:01<10:25:33, 481.20s/it][A
  3%|█                                          | 2/79 [08:01<4:14:28, 198.30s/it][A
  4%|█▋                                         | 3/79 [08:01<2:16:37, 107.87s/it][A
  5%|██▏                                         | 4/79 [08:01<1:21:43, 65.38s/it][A
  6%|██▉                                           | 5/79 [08:02<51:40, 41.90s/it][A
  8%|███▍                                          | 6/79 [08:02<33:45, 27.74s/it][A
  9%|████                                          | 7/79 [08:02<22:30, 18.75s/it][A
 10%|████▋                                         | 8/79 [08:03<15:13, 12.87s/it][A
 11%|█████▏                                        | 9/79 [08:03<10:24,  8.92s/it][A
 13%|█████▋                                       | 10/79 [08:03<07:11,  6.25s/it][A
 14%|██████▎                                      | 11/79 [08:03<05:00,  4.41s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 78.39%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:01<00:00,  3.25it/s, loss=0.588][A

Epoch 26 took 481.21 seconds
[26,  1564] loss: 0.182
lr: 0.01
Accuracy on the test set: 94.91%




  1%|▌                                         | 1/79 [08:04<10:29:21, 484.13s/it][A
  3%|█                                          | 2/79 [08:04<4:16:01, 199.50s/it][A
  4%|█▋                                         | 3/79 [08:04<2:17:27, 108.52s/it][A
  5%|██▏                                         | 4/79 [08:04<1:22:13, 65.78s/it][A
  6%|██▉                                           | 5/79 [08:05<51:59, 42.15s/it][A
  8%|███▍                                          | 6/79 [08:05<33:57, 27.91s/it][A
  9%|████                                          | 7/79 [08:05<22:38, 18.87s/it][A
 10%|████▋                                         | 8/79 [08:05<15:18, 12.94s/it][A
 11%|█████▏                                        | 9/79 [08:06<10:28,  8.98s/it][A
 13%|█████▋                                       | 10/79 [08:06<07:13,  6.28s/it][A
 14%|██████▎                                      | 11/79 [08:06<05:01,  4.44s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 67.01%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [08:04<00:00,  3.23it/s, loss=0.0895][A

Epoch 27 took 484.61 seconds
[27,  1564] loss: 0.178
lr: 0.01
Accuracy on the test set: 95.15%




  1%|▌                                         | 1/79 [08:07<10:34:08, 487.80s/it][A
  3%|█                                          | 2/79 [08:08<4:17:58, 201.01s/it][A
  4%|█▋                                         | 3/79 [08:08<2:18:30, 109.35s/it][A
  5%|██▏                                         | 4/79 [08:08<1:22:50, 66.28s/it][A
  6%|██▉                                           | 5/79 [08:08<52:22, 42.47s/it][A
  8%|███▍                                          | 6/79 [08:09<34:12, 28.12s/it][A
  9%|████                                          | 7/79 [08:09<22:48, 19.01s/it][A
 10%|████▋                                         | 8/79 [08:09<15:25, 13.04s/it][A
 11%|█████▏                                        | 9/79 [08:09<10:33,  9.04s/it][A
 13%|█████▋                                       | 10/79 [08:10<07:16,  6.33s/it][A
 14%|██████▎                                      | 11/79 [08:10<05:04,  4.47s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 84.44%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:01<00:00,  3.25it/s, loss=0.445][A

Epoch 28 took 481.04 seconds
[28,  1564] loss: 0.180
lr: 0.01
Accuracy on the test set: 94.97%




  1%|▌                                         | 1/79 [08:04<10:29:16, 484.05s/it][A
  3%|█                                          | 2/79 [08:04<4:15:59, 199.47s/it][A
  4%|█▋                                         | 3/79 [08:04<2:17:26, 108.51s/it][A
  5%|██▏                                         | 4/79 [08:04<1:22:12, 65.77s/it][A
  6%|██▉                                           | 5/79 [08:05<51:58, 42.15s/it][A
  8%|███▍                                          | 6/79 [08:05<33:56, 27.90s/it][A
  9%|████                                          | 7/79 [08:05<22:38, 18.86s/it][A
 10%|████▋                                         | 8/79 [08:05<15:18, 12.94s/it][A
 11%|█████▏                                        | 9/79 [08:06<10:28,  8.98s/it][A
 13%|█████▋                                       | 10/79 [08:06<07:13,  6.28s/it][A
 14%|██████▎                                      | 11/79 [08:06<05:01,  4.44s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 83.51%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:54<00:00,  3.30it/s, loss=0.649][A

Epoch 29 took 474.10 seconds
[29,  1564] loss: 0.172
lr: 0.01
Accuracy on the test set: 95.20%




  1%|▌                                         | 1/79 [07:57<10:20:06, 477.01s/it][A
  3%|█                                          | 2/79 [07:57<4:12:15, 196.57s/it][A
  4%|█▋                                         | 3/79 [07:57<2:15:26, 106.93s/it][A
  5%|██▏                                         | 4/79 [07:57<1:21:01, 64.82s/it][A
  6%|██▉                                           | 5/79 [07:58<51:13, 41.54s/it][A
  8%|███▍                                          | 6/79 [07:58<33:27, 27.50s/it][A
  9%|████                                          | 7/79 [07:58<22:18, 18.59s/it][A
 10%|████▋                                         | 8/79 [07:58<15:05, 12.76s/it][A
 11%|█████▏                                        | 9/79 [07:59<10:19,  8.85s/it][A
 13%|█████▋                                       | 10/79 [07:59<07:07,  6.20s/it][A
 14%|██████▎                                      | 11/79 [07:59<04:57,  4.38s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 76.52%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:51<00:00,  2.94it/s, loss=0.393][A

Epoch 30 took 531.20 seconds
[30,  1564] loss: 0.169
lr: 0.003
Accuracy on the test set: 95.42%




  1%|▌                                         | 1/79 [08:54<11:35:17, 534.84s/it][A
  3%|█                                          | 2/79 [08:55<4:42:52, 220.42s/it][A
  4%|█▋                                         | 3/79 [08:55<2:31:53, 119.91s/it][A
  5%|██▏                                         | 4/79 [08:55<1:30:52, 72.70s/it][A
  6%|██▉                                           | 5/79 [08:56<57:29, 46.61s/it][A
  8%|███▍                                          | 6/79 [08:56<37:34, 30.88s/it][A
  9%|████                                          | 7/79 [08:56<25:05, 20.90s/it][A
 10%|████▋                                         | 8/79 [08:57<17:00, 14.37s/it][A
 11%|█████▏                                        | 9/79 [08:57<11:40, 10.00s/it][A
 13%|█████▋                                       | 10/79 [08:58<08:05,  7.04s/it][A
 14%|██████▎                                      | 11/79 [08:58<05:40,  5.01s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 79.35%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [09:12<00:00,  2.83it/s, loss=0.0523][A

Epoch 31 took 552.40 seconds
[31,  1564] loss: 0.055
lr: 0.003
Accuracy on the test set: 99.29%




  1%|▌                                         | 1/79 [09:15<12:02:23, 555.68s/it][A
  3%|█                                          | 2/79 [09:15<4:53:50, 228.97s/it][A
  4%|█▋                                         | 3/79 [09:16<2:37:44, 124.54s/it][A
  5%|██▏                                         | 4/79 [09:16<1:34:20, 75.47s/it][A
  6%|██▉                                           | 5/79 [09:16<59:37, 48.35s/it][A
  8%|███▍                                          | 6/79 [09:16<38:55, 32.00s/it][A
  9%|████                                          | 7/79 [09:17<25:56, 21.62s/it][A
 10%|████▋                                         | 8/79 [09:17<17:32, 14.82s/it][A
 11%|█████▏                                        | 9/79 [09:17<11:58, 10.27s/it][A
 13%|█████▋                                       | 10/79 [09:18<08:15,  7.18s/it][A
 14%|██████▎                                      | 11/79 [09:18<05:44,  5.06s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 90.84%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [09:01<00:00,  2.88it/s, loss=0.0149][A

Epoch 32 took 541.84 seconds
[32,  1564] loss: 0.031
lr: 0.003
Accuracy on the test set: 99.91%




  1%|▌                                         | 1/79 [09:04<11:48:06, 544.70s/it][A
  3%|█                                          | 2/79 [09:04<4:48:01, 224.44s/it][A
  4%|█▋                                         | 3/79 [09:05<2:34:37, 122.07s/it][A
  5%|██▏                                         | 4/79 [09:05<1:32:28, 73.98s/it][A
  6%|██▉                                           | 5/79 [09:05<58:27, 47.40s/it][A
  8%|███▍                                          | 6/79 [09:05<38:09, 31.37s/it][A
  9%|████                                          | 7/79 [09:06<25:26, 21.20s/it][A
 10%|████▋                                         | 8/79 [09:06<17:11, 14.53s/it][A
 11%|█████▏                                        | 9/79 [09:06<11:44, 10.07s/it][A
 13%|█████▋                                       | 10/79 [09:07<08:05,  7.04s/it][A
 14%|██████▎                                      | 11/79 [09:07<05:37,  4.96s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 91.07%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [08:37<00:00,  3.02it/s, loss=0.0641][A

Epoch 33 took 517.83 seconds
[33,  1564] loss: 0.028
lr: 0.003
Accuracy on the test set: 99.98%




  1%|▌                                         | 1/79 [08:40<11:16:59, 520.77s/it][A
  3%|█                                          | 2/79 [08:41<4:35:23, 214.59s/it][A
  4%|█▋                                         | 3/79 [08:41<2:27:50, 116.72s/it][A
  5%|██▏                                         | 4/79 [08:41<1:28:25, 70.74s/it][A
  6%|██▉                                           | 5/79 [08:41<55:53, 45.32s/it][A
  8%|███▍                                          | 6/79 [08:42<36:30, 30.00s/it][A
  9%|████                                          | 7/79 [08:42<24:19, 20.28s/it][A
 10%|████▋                                         | 8/79 [08:42<16:27, 13.90s/it][A
 11%|█████▏                                        | 9/79 [08:42<11:14,  9.64s/it][A
 13%|█████▋                                       | 10/79 [08:43<07:45,  6.74s/it][A
 14%|██████▎                                      | 11/79 [08:43<05:23,  4.76s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 91.31%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [08:33<00:00,  3.05it/s, loss=0.0364][A

Epoch 34 took 513.18 seconds
[34,  1564] loss: 0.028
lr: 0.003
Accuracy on the test set: 99.99%




  1%|▌                                         | 1/79 [08:36<11:11:04, 516.21s/it][A
  3%|█                                          | 2/79 [08:36<4:32:58, 212.71s/it][A
  4%|█▋                                         | 3/79 [08:36<2:26:33, 115.70s/it][A
  5%|██▏                                         | 4/79 [08:36<1:27:39, 70.13s/it][A
  6%|██▉                                           | 5/79 [08:37<55:24, 44.93s/it][A
  8%|███▍                                          | 6/79 [08:37<36:11, 29.74s/it][A
  9%|████                                          | 7/79 [08:37<24:07, 20.10s/it][A
 10%|████▋                                         | 8/79 [08:38<16:18, 13.78s/it][A
 11%|█████▏                                        | 9/79 [08:38<11:08,  9.56s/it][A
 13%|█████▋                                       | 10/79 [08:38<07:41,  6.68s/it][A
 14%|██████▎                                      | 11/79 [08:38<05:20,  4.72s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 91.24%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [08:05<00:00,  3.22it/s, loss=0.0647][A

Epoch 35 took 485.52 seconds
[35,  1564] loss: 0.028
lr: 0.003
Accuracy on the test set: 99.99%




  1%|▌                                         | 1/79 [08:08<10:35:01, 488.48s/it][A
  3%|█                                          | 2/79 [08:08<4:18:19, 201.29s/it][A
  4%|█▋                                         | 3/79 [08:08<2:18:41, 109.50s/it][A
  5%|██▏                                         | 4/79 [08:09<1:22:57, 66.37s/it][A
  6%|██▉                                           | 5/79 [08:09<52:27, 42.53s/it][A
  8%|███▍                                          | 6/79 [08:09<34:15, 28.16s/it][A
  9%|████                                          | 7/79 [08:10<22:50, 19.04s/it][A
 10%|████▋                                         | 8/79 [08:10<15:27, 13.06s/it][A
 11%|█████▏                                        | 9/79 [08:10<10:33,  9.06s/it][A
 13%|█████▋                                       | 10/79 [08:10<07:17,  6.34s/it][A
 14%|██████▎                                      | 11/79 [08:11<05:04,  4.48s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 90.90%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [07:51<00:00,  3.31it/s, loss=0.0699][A

Epoch 36 took 471.51 seconds
[36,  1564] loss: 0.027
lr: 0.003
Accuracy on the test set: 99.99%




  1%|▌                                         | 1/79 [07:54<10:16:46, 474.45s/it][A
  3%|█                                          | 2/79 [07:54<4:10:54, 195.51s/it][A
  4%|█▋                                         | 3/79 [07:54<2:14:43, 106.36s/it][A
  5%|██▏                                         | 4/79 [07:55<1:20:35, 64.47s/it][A
  6%|██▉                                           | 5/79 [07:55<50:57, 41.31s/it][A
  8%|███▍                                          | 6/79 [07:55<33:16, 27.35s/it][A
  9%|████                                          | 7/79 [07:55<22:11, 18.50s/it][A
 10%|████▋                                         | 8/79 [07:56<15:00, 12.69s/it][A
 11%|█████▏                                        | 9/79 [07:56<10:16,  8.80s/it][A
 13%|█████▋                                       | 10/79 [07:56<07:05,  6.16s/it][A
 14%|██████▎                                      | 11/79 [07:57<04:56,  4.36s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 91.32%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [08:03<00:00,  3.23it/s, loss=0.044][A

Epoch 37 took 483.53 seconds
[37,  1564] loss: 0.028
lr: 0.003
Accuracy on the test set: 100.00%




  1%|▌                                         | 1/79 [08:06<10:32:24, 486.47s/it][A
  3%|█                                          | 2/79 [08:06<4:17:15, 200.46s/it][A
  4%|█▋                                         | 3/79 [08:06<2:18:07, 109.05s/it][A
  5%|██▏                                         | 4/79 [08:07<1:22:37, 66.10s/it][A
  6%|██▉                                           | 5/79 [08:07<52:14, 42.35s/it][A
  8%|███▍                                          | 6/79 [08:07<34:07, 28.04s/it][A
  9%|████                                          | 7/79 [08:08<22:45, 18.96s/it][A
 10%|████▋                                         | 8/79 [08:08<15:23, 13.01s/it][A
 11%|█████▏                                        | 9/79 [08:08<10:31,  9.02s/it][A
 13%|█████▋                                       | 10/79 [08:08<07:15,  6.32s/it][A
 14%|██████▎                                      | 11/79 [08:09<05:03,  4.46s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 91.10%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:48<00:00,  3.34it/s, loss=0.051][A

Epoch 38 took 468.64 seconds
[38,  1564] loss: 0.028
lr: 0.003
Accuracy on the test set: 100.00%




  1%|▌                                         | 1/79 [07:51<10:13:12, 471.70s/it][A
  3%|█                                          | 2/79 [07:51<4:09:27, 194.38s/it][A
  4%|█▋                                         | 3/79 [07:52<2:13:56, 105.74s/it][A
  5%|██▏                                         | 4/79 [07:52<1:20:07, 64.10s/it][A
  6%|██▉                                           | 5/79 [07:52<50:39, 41.08s/it][A
  8%|███▍                                          | 6/79 [07:52<33:05, 27.20s/it][A
  9%|████                                          | 7/79 [07:53<22:04, 18.39s/it][A
 10%|████▋                                         | 8/79 [07:53<14:55, 12.62s/it][A
 11%|█████▏                                        | 9/79 [07:53<10:12,  8.75s/it][A
 13%|█████▋                                       | 10/79 [07:54<07:03,  6.13s/it][A
 14%|██████▎                                      | 11/79 [07:54<04:54,  4.33s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 91.14%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [08:04<00:00,  3.23it/s, loss=0.0281][A

Epoch 39 took 484.26 seconds
[39,  1564] loss: 0.027
lr: 0.003
Accuracy on the test set: 100.00%




  1%|▌                                         | 1/79 [08:07<10:33:21, 487.20s/it][A
  3%|█                                          | 2/79 [08:07<4:17:38, 200.76s/it][A
  4%|█▋                                         | 3/79 [08:07<2:18:19, 109.21s/it][A
  5%|██▏                                         | 4/79 [08:07<1:22:44, 66.20s/it][A
  6%|██▉                                           | 5/79 [08:08<52:18, 42.42s/it][A
  8%|███▍                                          | 6/79 [08:08<34:10, 28.08s/it][A
  9%|████                                          | 7/79 [08:08<22:46, 18.99s/it][A
 10%|████▋                                         | 8/79 [08:09<15:24, 13.02s/it][A
 11%|█████▏                                        | 9/79 [08:09<10:32,  9.03s/it][A
 13%|█████▋                                       | 10/79 [08:09<07:16,  6.32s/it][A
 14%|██████▎                                      | 11/79 [08:09<05:03,  4.47s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 91.11%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:44<00:00,  3.36it/s, loss=0.128][A

Epoch 40 took 464.90 seconds
[40,  1564] loss: 0.027
lr: 0.003
Accuracy on the test set: 100.00%




  1%|▌                                         | 1/79 [07:47<10:08:21, 467.96s/it][A
  3%|█                                          | 2/79 [07:48<4:07:28, 192.84s/it][A
  4%|█▋                                         | 3/79 [07:48<2:12:52, 104.91s/it][A
  5%|██▏                                         | 4/79 [07:48<1:19:29, 63.59s/it][A
  6%|██▉                                           | 5/79 [07:49<50:15, 40.75s/it][A
  8%|███▍                                          | 6/79 [07:49<32:49, 26.99s/it][A
  9%|████                                          | 7/79 [07:49<21:53, 18.25s/it][A
 10%|████▋                                         | 8/79 [07:49<14:48, 12.52s/it][A
 11%|█████▏                                        | 9/79 [07:50<10:08,  8.69s/it][A
 13%|█████▋                                       | 10/79 [07:50<06:59,  6.08s/it][A
 14%|██████▎                                      | 11/79 [07:50<04:52,  4.30s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 90.18%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [07:42<00:00,  3.38it/s, loss=0.0719][A

Epoch 41 took 462.87 seconds
[41,  1564] loss: 0.027
lr: 0.003
Accuracy on the test set: 99.99%




  1%|▌                                         | 1/79 [07:45<10:05:40, 465.90s/it][A
  3%|█                                          | 2/79 [07:46<4:06:23, 192.00s/it][A
  4%|█▋                                         | 3/79 [07:46<2:12:17, 104.44s/it][A
  5%|██▏                                         | 4/79 [07:46<1:19:08, 63.31s/it][A
  6%|██▉                                           | 5/79 [07:46<50:02, 40.57s/it][A
  8%|███▍                                          | 6/79 [07:47<32:41, 26.87s/it][A
  9%|████                                          | 7/79 [07:47<21:48, 18.17s/it][A
 10%|████▋                                         | 8/79 [07:47<14:45, 12.47s/it][A
 11%|█████▏                                        | 9/79 [07:47<10:05,  8.65s/it][A
 13%|█████▋                                       | 10/79 [07:48<06:58,  6.06s/it][A
 14%|██████▎                                      | 11/79 [07:48<04:51,  4.28s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 89.99%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [07:49<00:00,  3.33it/s, loss=0.0482][A

Epoch 42 took 469.85 seconds
[42,  1564] loss: 0.027
lr: 0.003
Accuracy on the test set: 100.00%




  1%|▌                                         | 1/79 [07:52<10:14:48, 472.93s/it][A
  3%|█                                          | 2/79 [07:53<4:10:06, 194.89s/it][A
  4%|█▋                                         | 3/79 [07:53<2:14:17, 106.02s/it][A
  5%|██▏                                         | 4/79 [07:53<1:20:19, 64.27s/it][A
  6%|██▉                                           | 5/79 [07:53<50:47, 41.18s/it][A
  8%|███▍                                          | 6/79 [07:54<33:10, 27.27s/it][A
  9%|████                                          | 7/79 [07:54<22:07, 18.44s/it][A
 10%|████▋                                         | 8/79 [07:54<14:58, 12.65s/it][A
 11%|█████▏                                        | 9/79 [07:55<10:14,  8.78s/it][A
 13%|█████▋                                       | 10/79 [07:55<07:04,  6.15s/it][A
 14%|██████▎                                      | 11/79 [07:55<04:55,  4.34s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 90.67%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:43<00:00,  3.37it/s, loss=0.027][A

Epoch 43 took 463.78 seconds
[43,  1564] loss: 0.027
lr: 0.003
Accuracy on the test set: 100.00%




  1%|▌                                         | 1/79 [07:46<10:06:47, 466.77s/it][A
  3%|█                                          | 2/79 [07:47<4:06:51, 192.35s/it][A
  4%|█▋                                         | 3/79 [07:47<2:12:32, 104.64s/it][A
  5%|██▏                                         | 4/79 [07:47<1:19:17, 63.43s/it][A
  6%|██▉                                           | 5/79 [07:47<50:08, 40.65s/it][A
  8%|███▍                                          | 6/79 [07:48<32:44, 26.92s/it][A
  9%|████                                          | 7/79 [07:48<21:50, 18.20s/it][A
 10%|████▋                                         | 8/79 [07:48<14:46, 12.49s/it][A
 11%|█████▏                                        | 9/79 [07:48<10:06,  8.66s/it][A
 13%|█████▋                                       | 10/79 [07:49<06:58,  6.07s/it][A
 14%|██████▎                                      | 11/79 [07:49<04:51,  4.29s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 90.85%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:44<00:00,  3.36it/s, loss=0.039][A

Epoch 44 took 464.96 seconds
[44,  1564] loss: 0.027
lr: 0.003
Accuracy on the test set: 100.00%




  1%|▌                                         | 1/79 [07:48<10:08:27, 468.04s/it][A
  3%|█                                          | 2/79 [07:48<4:07:31, 192.88s/it][A
  4%|█▋                                         | 3/79 [07:48<2:12:54, 104.92s/it][A
  5%|██▏                                         | 4/79 [07:48<1:19:30, 63.60s/it][A
  6%|██▉                                           | 5/79 [07:49<50:16, 40.76s/it][A
  8%|███▍                                          | 6/79 [07:49<32:50, 26.99s/it][A
  9%|████                                          | 7/79 [07:49<21:53, 18.25s/it][A
 10%|████▋                                         | 8/79 [07:49<14:49, 12.52s/it][A
 11%|█████▏                                        | 9/79 [07:50<10:08,  8.69s/it][A
 13%|█████▋                                       | 10/79 [07:50<06:59,  6.08s/it][A
 14%|██████▎                                      | 11/79 [07:50<04:52,  4.30s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 90.63%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [07:45<00:00,  3.36it/s, loss=0.0397][A

Epoch 45 took 465.23 seconds
[45,  1564] loss: 0.028
lr: 0.003
Accuracy on the test set: 99.99%




  1%|▌                                         | 1/79 [07:48<10:08:47, 468.30s/it][A
  3%|█                                          | 2/79 [07:48<4:07:39, 192.99s/it][A
  4%|█▋                                         | 3/79 [07:48<2:12:58, 104.98s/it][A
  5%|██▏                                         | 4/79 [07:49<1:19:32, 63.64s/it][A
  6%|██▉                                           | 5/79 [07:49<50:17, 40.78s/it][A
  8%|███▍                                          | 6/79 [07:49<32:51, 27.00s/it][A
  9%|████                                          | 7/79 [07:49<21:54, 18.26s/it][A
 10%|████▋                                         | 8/79 [07:50<14:49, 12.53s/it][A
 11%|█████▏                                        | 9/79 [07:50<10:08,  8.69s/it][A
 13%|█████▋                                       | 10/79 [07:50<07:00,  6.09s/it][A
 14%|██████▎                                      | 11/79 [07:50<04:52,  4.30s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 89.58%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|█████████████████████████████| 1563/1563 [07:46<00:00,  3.35it/s, loss=0.172][A

Epoch 46 took 466.83 seconds
[46,  1564] loss: 0.164
lr: 0.003
Accuracy on the test set: 96.16%




  1%|▌                                         | 1/79 [07:49<10:10:48, 469.85s/it][A
  3%|█                                          | 2/79 [07:50<4:08:28, 193.62s/it][A
  4%|█▋                                         | 3/79 [07:50<2:13:24, 105.33s/it][A
  5%|██▏                                         | 4/79 [07:50<1:19:48, 63.85s/it][A
  6%|██▉                                           | 5/79 [07:50<50:27, 40.91s/it][A
  8%|███▍                                          | 6/79 [07:51<32:57, 27.09s/it][A
  9%|████                                          | 7/79 [07:51<21:58, 18.32s/it][A
 10%|████▋                                         | 8/79 [07:51<14:52, 12.57s/it][A
 11%|█████▏                                        | 9/79 [07:51<10:10,  8.72s/it][A
 13%|█████▋                                       | 10/79 [07:52<07:01,  6.11s/it][A
 14%|██████▎                                      | 11/79 [07:52<04:53,  4.32s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 85.31%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [07:37<00:00,  3.42it/s, loss=0.0793][A

Epoch 47 took 457.24 seconds
[47,  1564] loss: 0.115
lr: 0.003
Accuracy on the test set: 97.54%




  1%|▌                                          | 1/79 [07:40<9:58:19, 460.25s/it][A
  3%|█                                          | 2/79 [07:40<4:03:24, 189.67s/it][A
  4%|█▋                                         | 3/79 [07:40<2:10:41, 103.18s/it][A
  5%|██▏                                         | 4/79 [07:41<1:18:10, 62.55s/it][A
  6%|██▉                                           | 5/79 [07:41<49:26, 40.08s/it][A
  8%|███▍                                          | 6/79 [07:41<32:17, 26.54s/it][A
  9%|████                                          | 7/79 [07:41<21:32, 17.95s/it][A
 10%|████▋                                         | 8/79 [07:42<14:34, 12.32s/it][A
 11%|█████▏                                        | 9/79 [07:42<09:58,  8.55s/it][A
 13%|█████▋                                       | 10/79 [07:42<06:53,  5.99s/it][A
 14%|██████▎                                      | 11/79 [07:42<04:47,  4.23s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 84.22%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [07:39<00:00,  3.40it/s, loss=0.0347][A

Epoch 48 took 459.92 seconds
[48,  1564] loss: 0.082
lr: 0.003
Accuracy on the test set: 98.44%




  1%|▌                                         | 1/79 [07:42<10:01:48, 462.93s/it][A
  3%|█                                          | 2/79 [07:43<4:04:49, 190.77s/it][A
  4%|█▋                                         | 3/79 [07:43<2:11:26, 103.78s/it][A
  5%|██▏                                         | 4/79 [07:43<1:18:38, 62.91s/it][A
  6%|██▉                                           | 5/79 [07:43<49:43, 40.32s/it][A
  8%|███▍                                          | 6/79 [07:44<32:28, 26.70s/it][A
  9%|████                                          | 7/79 [07:44<21:39, 18.05s/it][A
 10%|████▋                                         | 8/79 [07:44<14:39, 12.39s/it][A
 11%|█████▏                                        | 9/79 [07:44<10:01,  8.59s/it][A
 13%|█████▋                                       | 10/79 [07:45<06:55,  6.02s/it][A
 14%|██████▎                                      | 11/79 [07:45<04:49,  4.26s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 86.60%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [07:56<00:00,  3.28it/s, loss=0.0942][A

Epoch 49 took 476.01 seconds
[49,  1564] loss: 0.067
lr: 0.003
Accuracy on the test set: 98.86%




  1%|▌                                         | 1/79 [07:59<10:22:43, 479.01s/it][A
  3%|█                                          | 2/79 [07:59<4:13:19, 197.39s/it][A
  4%|█▋                                         | 3/79 [07:59<2:16:00, 107.38s/it][A
  5%|██▏                                         | 4/79 [07:59<1:21:21, 65.09s/it][A
  6%|██▉                                           | 5/79 [08:00<51:26, 41.71s/it][A
  8%|███▍                                          | 6/79 [08:00<33:35, 27.62s/it][A
  9%|████                                          | 7/79 [08:00<22:24, 18.67s/it][A
 10%|████▋                                         | 8/79 [08:00<15:09, 12.81s/it][A
 11%|█████▏                                        | 9/79 [08:01<10:21,  8.89s/it][A
 13%|█████▋                                       | 10/79 [08:01<07:09,  6.22s/it][A
 14%|██████▎                                      | 11/79 [08:01<04:58,  4.40s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 85.05%


  0%|                                                    | 0/1563 [00:00<?, ?it/s]
100%|████████████████████████████| 1563/1563 [08:04<00:00,  3.23it/s, loss=0.0477][A

Epoch 50 took 484.31 seconds
[50,  1564] loss: 0.056
lr: 0.003
Accuracy on the test set: 99.19%




  1%|▌                                         | 1/79 [08:07<10:33:29, 487.30s/it][A
  3%|█                                          | 2/79 [08:07<4:17:42, 200.81s/it][A
  4%|█▋                                         | 3/79 [08:07<2:18:21, 109.23s/it][A
  5%|██▏                                         | 4/79 [08:08<1:22:45, 66.21s/it][A
  6%|██▉                                           | 5/79 [08:08<52:19, 42.43s/it][A
  8%|███▍                                          | 6/79 [08:08<34:10, 28.09s/it][A
  9%|████                                          | 7/79 [08:08<22:47, 18.99s/it][A
 10%|████▋                                         | 8/79 [08:09<15:24, 13.03s/it][A
 11%|█████▏                                        | 9/79 [08:09<10:32,  9.03s/it][A
 13%|█████▋                                       | 10/79 [08:09<07:16,  6.32s/it][A
 14%|██████▎                                      | 11/79 [08:09<05:03,  4.47s/it][A
 15%|██████▊                                      | 

Accuracy on the test set: 85.77%
Finished Training



