# LR & DA

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from main import CIFAR10_dataset, CIFAR10_trainer
import torch
import torchvision
import torch.nn as nn
from tqdm import tqdm
import multiprocessing
import torch.optim as optim
import torch.nn.functional as  F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

In [3]:
da_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(degrees=15),
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
])

train_dataset = CIFAR10_dataset(partition="train", transform=da_train)
test_dataset = CIFAR10_dataset(partition="test")

####################################################################
# DataLoader Class
####################################################################

batch_size = 150
num_workers = multiprocessing.cpu_count()-1
print("Num workers", num_workers)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=num_workers)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=num_workers)


Loading CIFAR10  train  Dataset...
Files already downloaded and verified
	Total Len.:  50000 
 --------------------------------------------------

Loading CIFAR10  test  Dataset...
Files already downloaded and verified
	Total Len.:  10000 
 --------------------------------------------------
Num workers 11


In [4]:
class BasicBlock(nn.Module):
    expansion = 1  # Para ResNet18/34, el factor de expansión es 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample  # Para ajustar dimensiones si es necesario

    def forward(self, x):
        identity = x  # Shortcut

        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity  # Residual connection
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64

        # Inicial: Convolución, BatchNorm y ReLU
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        # Bloques residuales
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        # Clasificación
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x
    
def ResNet18(num_classes=1000):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)

In [5]:
# Instantiating the network and printing its architecture
num_classes = 10
net = ResNet18(num_classes)
print(net)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Params: ", count_parameters(net))

####################################################################
# Training settings
####################################################################

# Training hyperparameters
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-6, momentum=0.9)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=10, min_lr=0.00001)
epochs = 200

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, e

In [6]:
trainer = CIFAR10_trainer(net, train_dataloader, test_dataloader, optimizer,criterion, epochs, lr_scheduler, batch_size=batch_size, early_stopping=20)

In [7]:
trainer.train()


---- Start Training ----


Epoch 0: 100%|██████████| 334/334 [01:03<00:00,  5.27batch/s]
Test 0: 100%|██████████| 67/67 [00:04<00:00, 15.73batch/s]


[Epoch 1] Train Loss: 1.724498 - Test Loss: 3.012578 - Train Error: 63.46% - Test Error: 73.80%


Epoch 1: 100%|██████████| 334/334 [01:04<00:00,  5.16batch/s]
Test 1: 100%|██████████| 67/67 [00:04<00:00, 15.47batch/s]


[Epoch 2] Train Loss: 1.383172 - Test Loss: 1.326653 - Train Error: 49.78% - Test Error: 44.42%


Epoch 2: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 2: 100%|██████████| 67/67 [00:04<00:00, 14.90batch/s]

[Epoch 3] Train Loss: 1.183185 - Test Loss: 1.997794 - Train Error: 42.24% - Test Error: 56.41%



Epoch 3: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 3: 100%|██████████| 67/67 [00:04<00:00, 15.35batch/s]


[Epoch 4] Train Loss: 1.049409 - Test Loss: 0.963046 - Train Error: 37.30% - Test Error: 34.12%


Epoch 4: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 4: 100%|██████████| 67/67 [00:04<00:00, 15.38batch/s]


[Epoch 5] Train Loss: 0.951807 - Test Loss: 0.896731 - Train Error: 33.34% - Test Error: 29.81%


Epoch 5: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 5: 100%|██████████| 67/67 [00:04<00:00, 14.89batch/s]

[Epoch 6] Train Loss: 0.881930 - Test Loss: 1.467784 - Train Error: 31.05% - Test Error: 42.98%



Epoch 6: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 6: 100%|██████████| 67/67 [00:04<00:00, 15.24batch/s]

[Epoch 7] Train Loss: 0.825361 - Test Loss: 1.013445 - Train Error: 28.72% - Test Error: 33.35%



Epoch 7: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 7: 100%|██████████| 67/67 [00:04<00:00, 15.17batch/s]


[Epoch 8] Train Loss: 0.782624 - Test Loss: 0.677843 - Train Error: 27.27% - Test Error: 23.30%


Epoch 8: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 8: 100%|██████████| 67/67 [00:04<00:00, 15.03batch/s]

[Epoch 9] Train Loss: 0.739377 - Test Loss: 1.114425 - Train Error: 25.83% - Test Error: 32.84%



Epoch 9: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 9: 100%|██████████| 67/67 [00:04<00:00, 15.33batch/s]


[Epoch 10] Train Loss: 0.711851 - Test Loss: 0.696568 - Train Error: 24.81% - Test Error: 23.24%


Epoch 10: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 10: 100%|██████████| 67/67 [00:04<00:00, 15.36batch/s]

[Epoch 11] Train Loss: 0.677055 - Test Loss: 1.021256 - Train Error: 23.38% - Test Error: 30.54%



Epoch 11: 100%|██████████| 334/334 [01:06<00:00,  5.00batch/s]
Test 11: 100%|██████████| 67/67 [00:04<00:00, 15.21batch/s]

[Epoch 12] Train Loss: 0.656731 - Test Loss: 0.745174 - Train Error: 22.77% - Test Error: 24.49%



Epoch 12: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 12: 100%|██████████| 67/67 [00:04<00:00, 15.05batch/s]


[Epoch 13] Train Loss: 0.631931 - Test Loss: 0.585495 - Train Error: 22.15% - Test Error: 19.75%


Epoch 13: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 13: 100%|██████████| 67/67 [00:04<00:00, 15.29batch/s]

[Epoch 14] Train Loss: 0.609935 - Test Loss: 0.651750 - Train Error: 21.22% - Test Error: 20.80%



Epoch 14: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 14: 100%|██████████| 67/67 [00:04<00:00, 15.27batch/s]

[Epoch 15] Train Loss: 0.592750 - Test Loss: 0.598552 - Train Error: 20.70% - Test Error: 20.33%



Epoch 15: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 15: 100%|██████████| 67/67 [00:04<00:00, 15.15batch/s]

[Epoch 16] Train Loss: 0.574211 - Test Loss: 0.838935 - Train Error: 19.76% - Test Error: 26.34%



Epoch 16: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 16: 100%|██████████| 67/67 [00:04<00:00, 14.93batch/s]


[Epoch 17] Train Loss: 0.552637 - Test Loss: 0.560606 - Train Error: 19.14% - Test Error: 18.78%


Epoch 17: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 17: 100%|██████████| 67/67 [00:04<00:00, 15.17batch/s]

[Epoch 18] Train Loss: 0.546263 - Test Loss: 0.520664 - Train Error: 19.11% - Test Error: 16.88%



Epoch 18: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 18: 100%|██████████| 67/67 [00:04<00:00, 15.19batch/s]

[Epoch 19] Train Loss: 0.522007 - Test Loss: 0.559682 - Train Error: 18.19% - Test Error: 18.48%



Epoch 19: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 19: 100%|██████████| 67/67 [00:04<00:00, 15.24batch/s]

[Epoch 20] Train Loss: 0.511122 - Test Loss: 0.499708 - Train Error: 17.88% - Test Error: 17.35%



Epoch 20: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 20: 100%|██████████| 67/67 [00:04<00:00, 15.15batch/s]


[Epoch 21] Train Loss: 0.500394 - Test Loss: 0.478131 - Train Error: 17.53% - Test Error: 16.18%


Epoch 21: 100%|██████████| 334/334 [01:07<00:00,  4.96batch/s]
Test 21: 100%|██████████| 67/67 [00:04<00:00, 14.78batch/s]

[Epoch 22] Train Loss: 0.488861 - Test Loss: 0.542337 - Train Error: 17.06% - Test Error: 17.99%



Epoch 22: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 22: 100%|██████████| 67/67 [00:04<00:00, 15.29batch/s]


[Epoch 23] Train Loss: 0.475178 - Test Loss: 0.450082 - Train Error: 16.68% - Test Error: 15.02%


Epoch 23: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 23: 100%|██████████| 67/67 [00:04<00:00, 15.33batch/s]

[Epoch 24] Train Loss: 0.465624 - Test Loss: 0.634600 - Train Error: 16.32% - Test Error: 19.76%



Epoch 24: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 24: 100%|██████████| 67/67 [00:04<00:00, 15.25batch/s]

[Epoch 25] Train Loss: 0.458049 - Test Loss: 0.500166 - Train Error: 15.96% - Test Error: 16.22%



Epoch 25: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 25: 100%|██████████| 67/67 [00:04<00:00, 15.26batch/s]


[Epoch 26] Train Loss: 0.444751 - Test Loss: 0.413078 - Train Error: 15.64% - Test Error: 14.13%


Epoch 26: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 26: 100%|██████████| 67/67 [00:04<00:00, 15.30batch/s]

[Epoch 27] Train Loss: 0.444684 - Test Loss: 0.554682 - Train Error: 15.53% - Test Error: 17.48%



Epoch 27: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 27: 100%|██████████| 67/67 [00:04<00:00, 15.19batch/s]

[Epoch 28] Train Loss: 0.431377 - Test Loss: 0.439078 - Train Error: 15.13% - Test Error: 14.27%



Epoch 28: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 28: 100%|██████████| 67/67 [00:04<00:00, 15.49batch/s]

[Epoch 29] Train Loss: 0.419025 - Test Loss: 0.445340 - Train Error: 14.65% - Test Error: 14.52%



Epoch 29: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 29: 100%|██████████| 67/67 [00:04<00:00, 15.37batch/s]

[Epoch 30] Train Loss: 0.412286 - Test Loss: 0.453037 - Train Error: 14.21% - Test Error: 15.24%



Epoch 30: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 30: 100%|██████████| 67/67 [00:04<00:00, 15.11batch/s]

[Epoch 31] Train Loss: 0.404240 - Test Loss: 0.535776 - Train Error: 14.10% - Test Error: 17.39%



Epoch 31: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 31: 100%|██████████| 67/67 [00:04<00:00, 15.39batch/s]

[Epoch 32] Train Loss: 0.395401 - Test Loss: 0.454167 - Train Error: 13.88% - Test Error: 14.82%



Epoch 32: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 32: 100%|██████████| 67/67 [00:04<00:00, 15.41batch/s]

[Epoch 33] Train Loss: 0.387921 - Test Loss: 0.515863 - Train Error: 13.53% - Test Error: 15.98%



Epoch 33: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 33: 100%|██████████| 67/67 [00:04<00:00, 15.38batch/s]


[Epoch 34] Train Loss: 0.384514 - Test Loss: 0.440542 - Train Error: 13.50% - Test Error: 13.98%


Epoch 34: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 34: 100%|██████████| 67/67 [00:04<00:00, 15.39batch/s]


[Epoch 35] Train Loss: 0.378901 - Test Loss: 0.432513 - Train Error: 13.19% - Test Error: 13.44%


Epoch 35: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 35: 100%|██████████| 67/67 [00:04<00:00, 15.36batch/s]


[Epoch 36] Train Loss: 0.368473 - Test Loss: 0.424567 - Train Error: 12.93% - Test Error: 13.39%


Epoch 36: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 36: 100%|██████████| 67/67 [00:04<00:00, 15.25batch/s]


[Epoch 37] Train Loss: 0.366629 - Test Loss: 0.392545 - Train Error: 13.04% - Test Error: 12.76%


Epoch 37: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 37: 100%|██████████| 67/67 [00:04<00:00, 15.06batch/s]

[Epoch 38] Train Loss: 0.356603 - Test Loss: 0.453074 - Train Error: 12.51% - Test Error: 14.65%



Epoch 38: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 38: 100%|██████████| 67/67 [00:04<00:00, 15.24batch/s]

[Epoch 39] Train Loss: 0.348492 - Test Loss: 0.406650 - Train Error: 12.28% - Test Error: 13.17%



Epoch 39: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 39: 100%|██████████| 67/67 [00:04<00:00, 15.45batch/s]


[Epoch 40] Train Loss: 0.347468 - Test Loss: 0.373913 - Train Error: 12.06% - Test Error: 11.95%


Epoch 40: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 40: 100%|██████████| 67/67 [00:04<00:00, 15.32batch/s]

[Epoch 41] Train Loss: 0.339672 - Test Loss: 0.482825 - Train Error: 11.87% - Test Error: 15.24%



Epoch 41: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 41: 100%|██████████| 67/67 [00:04<00:00, 15.35batch/s]

[Epoch 42] Train Loss: 0.336426 - Test Loss: 0.399292 - Train Error: 11.89% - Test Error: 12.68%



Epoch 42: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 42: 100%|██████████| 67/67 [00:04<00:00, 15.16batch/s]

[Epoch 43] Train Loss: 0.333426 - Test Loss: 0.493683 - Train Error: 11.64% - Test Error: 14.86%



Epoch 43: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 43: 100%|██████████| 67/67 [00:04<00:00, 15.40batch/s]

[Epoch 44] Train Loss: 0.323578 - Test Loss: 0.375791 - Train Error: 11.34% - Test Error: 12.11%



Epoch 44: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 44: 100%|██████████| 67/67 [00:04<00:00, 15.34batch/s]

[Epoch 45] Train Loss: 0.318939 - Test Loss: 0.397557 - Train Error: 11.15% - Test Error: 12.15%



Epoch 45: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 45: 100%|██████████| 67/67 [00:04<00:00, 15.37batch/s]

[Epoch 46] Train Loss: 0.314233 - Test Loss: 0.414144 - Train Error: 11.09% - Test Error: 12.87%



Epoch 46: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 46: 100%|██████████| 67/67 [00:04<00:00, 15.13batch/s]

[Epoch 47] Train Loss: 0.307554 - Test Loss: 0.404263 - Train Error: 10.73% - Test Error: 12.59%



Epoch 47: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 47: 100%|██████████| 67/67 [00:04<00:00, 15.27batch/s]

[Epoch 48] Train Loss: 0.304466 - Test Loss: 0.384537 - Train Error: 10.64% - Test Error: 12.30%



Epoch 48: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 48: 100%|██████████| 67/67 [00:04<00:00, 15.36batch/s]


[Epoch 49] Train Loss: 0.302718 - Test Loss: 0.366724 - Train Error: 10.54% - Test Error: 11.41%


Epoch 49: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 49: 100%|██████████| 67/67 [00:04<00:00, 15.29batch/s]

[Epoch 50] Train Loss: 0.296890 - Test Loss: 0.442184 - Train Error: 10.45% - Test Error: 12.80%



Epoch 50: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 50: 100%|██████████| 67/67 [00:04<00:00, 15.12batch/s]

[Epoch 51] Train Loss: 0.293787 - Test Loss: 0.386530 - Train Error: 10.49% - Test Error: 11.89%



Epoch 51: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 51: 100%|██████████| 67/67 [00:04<00:00, 15.04batch/s]

[Epoch 52] Train Loss: 0.291643 - Test Loss: 0.383505 - Train Error: 10.23% - Test Error: 11.65%



Epoch 52: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 52: 100%|██████████| 67/67 [00:04<00:00, 15.38batch/s]


[Epoch 53] Train Loss: 0.282378 - Test Loss: 0.360061 - Train Error: 10.02% - Test Error: 11.02%


Epoch 53: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 53: 100%|██████████| 67/67 [00:04<00:00, 15.08batch/s]

[Epoch 54] Train Loss: 0.284539 - Test Loss: 0.435611 - Train Error: 10.02% - Test Error: 13.52%



Epoch 54: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 54: 100%|██████████| 67/67 [00:04<00:00, 15.39batch/s]

[Epoch 55] Train Loss: 0.278305 - Test Loss: 0.366818 - Train Error: 9.91% - Test Error: 11.17%



Epoch 55: 100%|██████████| 334/334 [01:06<00:00,  5.01batch/s]
Test 55: 100%|██████████| 67/67 [00:04<00:00, 15.33batch/s]

[Epoch 56] Train Loss: 0.270197 - Test Loss: 0.374808 - Train Error: 9.63% - Test Error: 11.11%



Epoch 56: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 56: 100%|██████████| 67/67 [00:04<00:00, 15.31batch/s]


[Epoch 57] Train Loss: 0.263140 - Test Loss: 0.371201 - Train Error: 9.33% - Test Error: 10.90%


Epoch 57: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 57: 100%|██████████| 67/67 [00:04<00:00, 15.21batch/s]

[Epoch 58] Train Loss: 0.264519 - Test Loss: 0.401777 - Train Error: 9.33% - Test Error: 11.77%



Epoch 58: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 58: 100%|██████████| 67/67 [00:04<00:00, 15.19batch/s]

[Epoch 59] Train Loss: 0.264576 - Test Loss: 0.372934 - Train Error: 9.30% - Test Error: 11.21%



Epoch 59: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 59: 100%|██████████| 67/67 [00:04<00:00, 15.31batch/s]

[Epoch 60] Train Loss: 0.261998 - Test Loss: 0.382229 - Train Error: 9.28% - Test Error: 11.10%



Epoch 60: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 60: 100%|██████████| 67/67 [00:04<00:00, 15.40batch/s]

[Epoch 61] Train Loss: 0.255148 - Test Loss: 0.385610 - Train Error: 8.99% - Test Error: 11.23%



Epoch 61: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 61: 100%|██████████| 67/67 [00:04<00:00, 15.36batch/s]

[Epoch 62] Train Loss: 0.250160 - Test Loss: 0.383428 - Train Error: 8.77% - Test Error: 11.23%



Epoch 62: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 62: 100%|██████████| 67/67 [00:04<00:00, 14.95batch/s]

[Epoch 63] Train Loss: 0.254308 - Test Loss: 0.407102 - Train Error: 9.10% - Test Error: 11.68%



Epoch 63: 100%|██████████| 334/334 [01:06<00:00,  5.02batch/s]
Test 63: 100%|██████████| 67/67 [00:04<00:00, 15.31batch/s]

[Epoch 64] Train Loss: 0.243577 - Test Loss: 0.433841 - Train Error: 8.68% - Test Error: 12.33%



Epoch 64: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 64: 100%|██████████| 67/67 [00:04<00:00, 15.25batch/s]


[Epoch 65] Train Loss: 0.194896 - Test Loss: 0.322047 - Train Error: 6.96% - Test Error: 9.58%


Epoch 65: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 65: 100%|██████████| 67/67 [00:04<00:00, 15.37batch/s]


[Epoch 66] Train Loss: 0.179331 - Test Loss: 0.323755 - Train Error: 6.39% - Test Error: 9.31%


Epoch 66: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 66: 100%|██████████| 67/67 [00:04<00:00, 15.13batch/s]


[Epoch 67] Train Loss: 0.173396 - Test Loss: 0.324423 - Train Error: 6.04% - Test Error: 9.24%


Epoch 67: 100%|██████████| 334/334 [01:07<00:00,  4.92batch/s]
Test 67: 100%|██████████| 67/67 [00:04<00:00, 14.46batch/s]

[Epoch 68] Train Loss: 0.169840 - Test Loss: 0.330572 - Train Error: 5.92% - Test Error: 9.35%



Epoch 68: 100%|██████████| 334/334 [01:10<00:00,  4.74batch/s]
Test 68: 100%|██████████| 67/67 [00:04<00:00, 14.46batch/s]

[Epoch 69] Train Loss: 0.168209 - Test Loss: 0.330092 - Train Error: 5.89% - Test Error: 9.32%



Epoch 69: 100%|██████████| 334/334 [01:09<00:00,  4.78batch/s]
Test 69: 100%|██████████| 67/67 [00:04<00:00, 14.70batch/s]


[Epoch 70] Train Loss: 0.161951 - Test Loss: 0.333407 - Train Error: 5.76% - Test Error: 9.23%


Epoch 70: 100%|██████████| 334/334 [01:10<00:00,  4.73batch/s]
Test 70: 100%|██████████| 67/67 [00:04<00:00, 14.07batch/s]

[Epoch 71] Train Loss: 0.159023 - Test Loss: 0.327298 - Train Error: 5.56% - Test Error: 9.29%



Epoch 71: 100%|██████████| 334/334 [01:10<00:00,  4.71batch/s]
Test 71: 100%|██████████| 67/67 [00:04<00:00, 14.40batch/s]


[Epoch 72] Train Loss: 0.161990 - Test Loss: 0.333553 - Train Error: 5.67% - Test Error: 9.02%


Epoch 72: 100%|██████████| 334/334 [01:09<00:00,  4.79batch/s]
Test 72: 100%|██████████| 67/67 [00:04<00:00, 14.62batch/s]

[Epoch 73] Train Loss: 0.159429 - Test Loss: 0.340639 - Train Error: 5.67% - Test Error: 9.52%



Epoch 73: 100%|██████████| 334/334 [01:10<00:00,  4.72batch/s]
Test 73: 100%|██████████| 67/67 [00:04<00:00, 14.59batch/s]

[Epoch 74] Train Loss: 0.156847 - Test Loss: 0.347298 - Train Error: 5.50% - Test Error: 9.35%



Epoch 74: 100%|██████████| 334/334 [01:09<00:00,  4.82batch/s]
Test 74: 100%|██████████| 67/67 [00:04<00:00, 13.92batch/s]

[Epoch 75] Train Loss: 0.158517 - Test Loss: 0.337114 - Train Error: 5.65% - Test Error: 9.27%



Epoch 75: 100%|██████████| 334/334 [01:14<00:00,  4.51batch/s]
Test 75: 100%|██████████| 67/67 [00:04<00:00, 13.85batch/s]

[Epoch 76] Train Loss: 0.155079 - Test Loss: 0.337015 - Train Error: 5.49% - Test Error: 9.38%



Epoch 76: 100%|██████████| 334/334 [01:10<00:00,  4.72batch/s]
Test 76: 100%|██████████| 67/67 [00:04<00:00, 14.53batch/s]

[Epoch 77] Train Loss: 0.143107 - Test Loss: 0.324343 - Train Error: 5.05% - Test Error: 9.05%



Epoch 77: 100%|██████████| 334/334 [01:09<00:00,  4.78batch/s]
Test 77: 100%|██████████| 67/67 [00:04<00:00, 13.90batch/s]

[Epoch 78] Train Loss: 0.140757 - Test Loss: 0.323256 - Train Error: 4.99% - Test Error: 9.04%



Epoch 78: 100%|██████████| 334/334 [01:10<00:00,  4.77batch/s]
Test 78: 100%|██████████| 67/67 [00:04<00:00, 14.04batch/s]


[Epoch 79] Train Loss: 0.137890 - Test Loss: 0.325393 - Train Error: 4.81% - Test Error: 8.95%


Epoch 79: 100%|██████████| 334/334 [01:12<00:00,  4.63batch/s]
Test 79: 100%|██████████| 67/67 [00:04<00:00, 14.20batch/s]

[Epoch 80] Train Loss: 0.134378 - Test Loss: 0.323850 - Train Error: 4.79% - Test Error: 8.97%



Epoch 80: 100%|██████████| 334/334 [01:11<00:00,  4.69batch/s]
Test 80: 100%|██████████| 67/67 [00:04<00:00, 14.28batch/s]


[Epoch 81] Train Loss: 0.136160 - Test Loss: 0.325586 - Train Error: 4.75% - Test Error: 8.80%


Epoch 81: 100%|██████████| 334/334 [01:11<00:00,  4.66batch/s]
Test 81: 100%|██████████| 67/67 [00:04<00:00, 13.96batch/s]

[Epoch 82] Train Loss: 0.138337 - Test Loss: 0.324458 - Train Error: 4.82% - Test Error: 8.94%



Epoch 82: 100%|██████████| 334/334 [01:12<00:00,  4.63batch/s]
Test 82: 100%|██████████| 67/67 [00:04<00:00, 13.98batch/s]

[Epoch 83] Train Loss: 0.132218 - Test Loss: 0.326890 - Train Error: 4.65% - Test Error: 8.82%



Epoch 83: 100%|██████████| 334/334 [01:11<00:00,  4.68batch/s]
Test 83: 100%|██████████| 67/67 [00:04<00:00, 13.81batch/s]

[Epoch 84] Train Loss: 0.130886 - Test Loss: 0.327378 - Train Error: 4.54% - Test Error: 9.00%



Epoch 84: 100%|██████████| 334/334 [01:11<00:00,  4.68batch/s]
Test 84: 100%|██████████| 67/67 [00:04<00:00, 14.36batch/s]

[Epoch 85] Train Loss: 0.130065 - Test Loss: 0.328775 - Train Error: 4.55% - Test Error: 8.93%



Epoch 85: 100%|██████████| 334/334 [01:11<00:00,  4.68batch/s]
Test 85: 100%|██████████| 67/67 [00:04<00:00, 14.69batch/s]

[Epoch 86] Train Loss: 0.132765 - Test Loss: 0.327417 - Train Error: 4.58% - Test Error: 8.87%



Epoch 86: 100%|██████████| 334/334 [01:09<00:00,  4.79batch/s]
Test 86: 100%|██████████| 67/67 [00:04<00:00, 14.31batch/s]

[Epoch 87] Train Loss: 0.131354 - Test Loss: 0.331162 - Train Error: 4.61% - Test Error: 9.07%



Epoch 87: 100%|██████████| 334/334 [01:09<00:00,  4.80batch/s]
Test 87: 100%|██████████| 67/67 [00:04<00:00, 14.59batch/s]

[Epoch 88] Train Loss: 0.129426 - Test Loss: 0.327339 - Train Error: 4.51% - Test Error: 8.97%



Epoch 88: 100%|██████████| 334/334 [01:10<00:00,  4.77batch/s]
Test 88: 100%|██████████| 67/67 [00:04<00:00, 13.99batch/s]

[Epoch 89] Train Loss: 0.127318 - Test Loss: 0.328207 - Train Error: 4.46% - Test Error: 8.95%



Epoch 89: 100%|██████████| 334/334 [01:11<00:00,  4.67batch/s]
Test 89: 100%|██████████| 67/67 [00:04<00:00, 13.96batch/s]

[Epoch 90] Train Loss: 0.126049 - Test Loss: 0.329816 - Train Error: 4.41% - Test Error: 9.03%



Epoch 90: 100%|██████████| 334/334 [01:11<00:00,  4.69batch/s]
Test 90: 100%|██████████| 67/67 [00:04<00:00, 13.78batch/s]

[Epoch 91] Train Loss: 0.129580 - Test Loss: 0.331712 - Train Error: 4.62% - Test Error: 8.89%



Epoch 91: 100%|██████████| 334/334 [01:12<00:00,  4.62batch/s]
Test 91: 100%|██████████| 67/67 [00:04<00:00, 13.84batch/s]

[Epoch 92] Train Loss: 0.128295 - Test Loss: 0.330192 - Train Error: 4.52% - Test Error: 8.89%



Epoch 92: 100%|██████████| 334/334 [01:12<00:00,  4.60batch/s]
Test 92: 100%|██████████| 67/67 [00:05<00:00, 13.17batch/s]

[Epoch 93] Train Loss: 0.125844 - Test Loss: 0.330872 - Train Error: 4.44% - Test Error: 8.82%



Epoch 93: 100%|██████████| 334/334 [01:13<00:00,  4.56batch/s]
Test 93: 100%|██████████| 67/67 [00:04<00:00, 14.38batch/s]

[Epoch 94] Train Loss: 0.124895 - Test Loss: 0.326971 - Train Error: 4.31% - Test Error: 8.83%



Epoch 94: 100%|██████████| 334/334 [01:10<00:00,  4.77batch/s]
Test 94: 100%|██████████| 67/67 [00:04<00:00, 14.08batch/s]


[Epoch 95] Train Loss: 0.121233 - Test Loss: 0.329063 - Train Error: 4.24% - Test Error: 8.76%


Epoch 95: 100%|██████████| 334/334 [01:11<00:00,  4.67batch/s]
Test 95: 100%|██████████| 67/67 [00:04<00:00, 13.91batch/s]

[Epoch 96] Train Loss: 0.126531 - Test Loss: 0.328945 - Train Error: 4.47% - Test Error: 8.76%



Epoch 96: 100%|██████████| 334/334 [01:10<00:00,  4.77batch/s]
Test 96: 100%|██████████| 67/67 [00:04<00:00, 14.31batch/s]

[Epoch 97] Train Loss: 0.125782 - Test Loss: 0.332259 - Train Error: 4.44% - Test Error: 8.88%



Epoch 97: 100%|██████████| 334/334 [01:10<00:00,  4.73batch/s]
Test 97: 100%|██████████| 67/67 [00:04<00:00, 14.00batch/s]

[Epoch 98] Train Loss: 0.125563 - Test Loss: 0.328725 - Train Error: 4.47% - Test Error: 8.76%



Epoch 98: 100%|██████████| 334/334 [01:11<00:00,  4.69batch/s]
Test 98: 100%|██████████| 67/67 [00:04<00:00, 14.19batch/s]


[Epoch 99] Train Loss: 0.122530 - Test Loss: 0.330100 - Train Error: 4.32% - Test Error: 8.75%


Epoch 99: 100%|██████████| 334/334 [01:11<00:00,  4.65batch/s]
Test 99: 100%|██████████| 67/67 [00:04<00:00, 13.93batch/s]

[Epoch 100] Train Loss: 0.122680 - Test Loss: 0.328853 - Train Error: 4.36% - Test Error: 8.87%



Epoch 100: 100%|██████████| 334/334 [01:10<00:00,  4.72batch/s]
Test 100: 100%|██████████| 67/67 [00:04<00:00, 14.42batch/s]

[Epoch 101] Train Loss: 0.125384 - Test Loss: 0.328951 - Train Error: 4.40% - Test Error: 8.77%



Epoch 101: 100%|██████████| 334/334 [01:11<00:00,  4.69batch/s]
Test 101: 100%|██████████| 67/67 [00:04<00:00, 14.32batch/s]

[Epoch 102] Train Loss: 0.122335 - Test Loss: 0.327039 - Train Error: 4.41% - Test Error: 8.93%



Epoch 102: 100%|██████████| 334/334 [01:10<00:00,  4.73batch/s]
Test 102: 100%|██████████| 67/67 [00:04<00:00, 14.40batch/s]


[Epoch 103] Train Loss: 0.126589 - Test Loss: 0.328469 - Train Error: 4.39% - Test Error: 8.72%


Epoch 103: 100%|██████████| 334/334 [01:11<00:00,  4.66batch/s]
Test 103: 100%|██████████| 67/67 [00:04<00:00, 14.54batch/s]

[Epoch 104] Train Loss: 0.124851 - Test Loss: 0.329929 - Train Error: 4.36% - Test Error: 8.86%



Epoch 104: 100%|██████████| 334/334 [01:09<00:00,  4.78batch/s]
Test 104: 100%|██████████| 67/67 [00:04<00:00, 13.92batch/s]

[Epoch 105] Train Loss: 0.126009 - Test Loss: 0.330506 - Train Error: 4.44% - Test Error: 8.81%



Epoch 105: 100%|██████████| 334/334 [01:08<00:00,  4.85batch/s]
Test 105: 100%|██████████| 67/67 [00:04<00:00, 14.76batch/s]

[Epoch 106] Train Loss: 0.123394 - Test Loss: 0.333032 - Train Error: 4.27% - Test Error: 8.84%



Epoch 106: 100%|██████████| 334/334 [01:09<00:00,  4.80batch/s]
Test 106: 100%|██████████| 67/67 [00:04<00:00, 14.02batch/s]

[Epoch 107] Train Loss: 0.124139 - Test Loss: 0.329503 - Train Error: 4.39% - Test Error: 8.86%



Epoch 107: 100%|██████████| 334/334 [01:10<00:00,  4.72batch/s]
Test 107: 100%|██████████| 67/67 [00:04<00:00, 14.45batch/s]

[Epoch 108] Train Loss: 0.124470 - Test Loss: 0.330294 - Train Error: 4.35% - Test Error: 8.73%



Epoch 108: 100%|██████████| 334/334 [01:09<00:00,  4.78batch/s]
Test 108: 100%|██████████| 67/67 [00:04<00:00, 14.69batch/s]

[Epoch 109] Train Loss: 0.124848 - Test Loss: 0.330025 - Train Error: 4.41% - Test Error: 8.91%



Epoch 109: 100%|██████████| 334/334 [01:10<00:00,  4.76batch/s]
Test 109: 100%|██████████| 67/67 [00:04<00:00, 13.50batch/s]

[Epoch 110] Train Loss: 0.123619 - Test Loss: 0.327809 - Train Error: 4.29% - Test Error: 8.75%



Epoch 110: 100%|██████████| 334/334 [01:12<00:00,  4.59batch/s]
Test 110: 100%|██████████| 67/67 [00:04<00:00, 14.07batch/s]

[Epoch 111] Train Loss: 0.125828 - Test Loss: 0.327678 - Train Error: 4.36% - Test Error: 8.79%



Epoch 111: 100%|██████████| 334/334 [01:12<00:00,  4.64batch/s]
Test 111: 100%|██████████| 67/67 [00:04<00:00, 13.73batch/s]

[Epoch 112] Train Loss: 0.122472 - Test Loss: 0.327882 - Train Error: 4.37% - Test Error: 8.90%



Epoch 112: 100%|██████████| 334/334 [01:13<00:00,  4.56batch/s]
Test 112: 100%|██████████| 67/67 [00:04<00:00, 13.80batch/s]

[Epoch 113] Train Loss: 0.125953 - Test Loss: 0.329986 - Train Error: 4.40% - Test Error: 8.89%



Epoch 113: 100%|██████████| 334/334 [01:10<00:00,  4.73batch/s]
Test 113: 100%|██████████| 67/67 [00:04<00:00, 13.91batch/s]

[Epoch 114] Train Loss: 0.122286 - Test Loss: 0.330968 - Train Error: 4.24% - Test Error: 8.95%



Epoch 114: 100%|██████████| 334/334 [01:12<00:00,  4.59batch/s]
Test 114: 100%|██████████| 67/67 [00:04<00:00, 13.99batch/s]

[Epoch 115] Train Loss: 0.123957 - Test Loss: 0.328759 - Train Error: 4.40% - Test Error: 8.88%



Epoch 115: 100%|██████████| 334/334 [01:11<00:00,  4.68batch/s]
Test 115: 100%|██████████| 67/67 [00:04<00:00, 14.55batch/s]

[Epoch 116] Train Loss: 0.124033 - Test Loss: 0.328309 - Train Error: 4.26% - Test Error: 8.84%



Epoch 116: 100%|██████████| 334/334 [01:09<00:00,  4.79batch/s]
Test 116: 100%|██████████| 67/67 [00:04<00:00, 14.26batch/s]

[Epoch 117] Train Loss: 0.128945 - Test Loss: 0.329713 - Train Error: 4.51% - Test Error: 8.85%



Epoch 117: 100%|██████████| 334/334 [01:10<00:00,  4.72batch/s]
Test 117: 100%|██████████| 67/67 [00:04<00:00, 14.05batch/s]

[Epoch 118] Train Loss: 0.125733 - Test Loss: 0.331433 - Train Error: 4.44% - Test Error: 8.89%



Epoch 118: 100%|██████████| 334/334 [01:09<00:00,  4.79batch/s]
Test 118: 100%|██████████| 67/67 [00:04<00:00, 14.29batch/s]

[Epoch 119] Train Loss: 0.124695 - Test Loss: 0.326883 - Train Error: 4.41% - Test Error: 8.91%



Epoch 119: 100%|██████████| 334/334 [01:09<00:00,  4.79batch/s]
Test 119: 100%|██████████| 67/67 [00:04<00:00, 14.47batch/s]

[Epoch 120] Train Loss: 0.124181 - Test Loss: 0.327135 - Train Error: 4.48% - Test Error: 8.90%



Epoch 120: 100%|██████████| 334/334 [01:10<00:00,  4.73batch/s]
Test 120: 100%|██████████| 67/67 [00:04<00:00, 14.32batch/s]

[Epoch 121] Train Loss: 0.123391 - Test Loss: 0.329563 - Train Error: 4.24% - Test Error: 8.85%



Epoch 121: 100%|██████████| 334/334 [01:09<00:00,  4.82batch/s]
Test 121: 100%|██████████| 67/67 [00:04<00:00, 14.26batch/s]

[Epoch 122] Train Loss: 0.125323 - Test Loss: 0.327833 - Train Error: 4.29% - Test Error: 8.72%



Epoch 122: 100%|██████████| 334/334 [01:09<00:00,  4.79batch/s]
Test 122: 100%|██████████| 67/67 [00:04<00:00, 14.42batch/s]

[Epoch 123] Train Loss: 0.119232 - Test Loss: 0.329192 - Train Error: 4.12% - Test Error: 8.82%



Epoch 123: 100%|██████████| 334/334 [01:09<00:00,  4.81batch/s]
Test 123: 100%|██████████| 67/67 [00:04<00:00, 14.44batch/s]

[Epoch 124] Train Loss: 0.123683 - Test Loss: 0.328148 - Train Error: 4.28% - Test Error: 8.85%

Early Stopping at epoch  123

BEST TEST ERROR:  8.72  in epoch  102



