# LR & DA

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from main import CIFAR10_dataset, CIFAR10_trainer
import torch
import torchvision
import torch.nn as nn
from tqdm import tqdm
import multiprocessing
import torch.optim as optim
import torch.nn.functional as  F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import v2
from torch.utils.data import default_collate

In [3]:
def add_gaussian_noise(img, mean=0, std=0.1):
    noise = torch.randn(img.size()) * std + mean
    return img + noise

# Transformaciones para entrenamiento
train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(degrees=15),
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    
    transforms.ToTensor(),
    # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)),
    transforms.Lambda(lambda x: add_gaussian_noise(x, 0, 0.1)),
])
train_dataset = CIFAR10_dataset(partition="train", transform=train_transforms)
test_dataset = CIFAR10_dataset(partition="test")

# CutMix and MixUp
cutmix = v2.CutMix(num_classes=10)
mixup = v2.MixUp(num_classes=10)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])

def collate_fn(batch):
    data = default_collate(batch)  # Asegura el formato (inputs, targets)
    inputs, labels= cutmix_or_mixup(data['img'], data['label']) # Aplica CutMix o MixUp
    return {"img": inputs, "label": labels}

####################################################################
# DataLoader Class
####################################################################

batch_size = 150
num_workers = multiprocessing.cpu_count()-1
print("Num workers", num_workers)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size, shuffle=False, num_workers=num_workers)


Loading CIFAR10  train  Dataset...
Files already downloaded and verified
	Total Len.:  50000 
 --------------------------------------------------

Loading CIFAR10  test  Dataset...
Files already downloaded and verified
	Total Len.:  10000 
 --------------------------------------------------
Num workers 11


In [4]:
class BasicBlock(nn.Module):
    expansion = 1  # Para ResNet18/34, el factor de expansión es 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        
        # Primera convolución
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        
        # Segunda convolución
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.relu = nn.ReLU(inplace=True)

        # Shortcut solo si cambia la dimensión
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = x  # Shortcut

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(identity)  # Residual connection
        out = self.relu(out)
        

        return out

class ResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet, self).__init__()

        # Inicial: Convolución, BatchNorm y ReLU
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        # Bloques residuales

        self.layer1 = self._make_layer(64, 64, stride=1)
        self.layer2 = self._make_layer(64, 128, stride=2)
        self.layer3 = self._make_layer(128, 256, stride=2)
        self.layer4 = self._make_layer(256, 512, stride=2)

        # Clasificación
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes)
        self.dropout = nn.Dropout(0.5)

    def _make_layer(self, in_channels, out_channels, stride):
        return nn.Sequential(
            BasicBlock(in_channels, out_channels, stride),  # Primer bloque (posible downsampling)
            BasicBlock(out_channels, out_channels)         # Segundo bloque (sin downsampling)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)

        return x
    

In [5]:
# Instantiating the network and printing its architecture
num_classes = 10
net = ResNet(num_classes)
print(net)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Params: ", count_parameters(net))

####################################################################
# Training settings
####################################################################

# Training hyperparameters
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-6, momentum=0.9)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, min_lr=0.00001)
epochs = 100

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

In [6]:
trainer = CIFAR10_trainer(net, train_dataloader, test_dataloader, optimizer,criterion, epochs, lr_scheduler, batch_size=batch_size)

In [7]:
trainer.train()


---- Start Training ----


Epoch 0:   0%|          | 0/334 [00:00<?, ?batch/s]

Epoch 0: 100%|██████████| 334/334 [01:05<00:00,  5.07batch/s]
Test 0: 100%|██████████| 67/67 [00:04<00:00, 14.92batch/s]


[Epoch 1] Train Loss: 1.955489 - Test Loss: 1.354893 - Train Error: 65.08% - Test Error: 46.42%


Epoch 1: 100%|██████████| 334/334 [01:06<00:00,  5.00batch/s]
Test 1: 100%|██████████| 67/67 [00:04<00:00, 14.85batch/s]


[Epoch 2] Train Loss: 1.723236 - Test Loss: 1.247293 - Train Error: 52.86% - Test Error: 40.28%


Epoch 2: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 2: 100%|██████████| 67/67 [00:04<00:00, 14.93batch/s]


[Epoch 3] Train Loss: 1.632745 - Test Loss: 1.033417 - Train Error: 46.82% - Test Error: 32.20%


Epoch 3: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 3: 100%|██████████| 67/67 [00:04<00:00, 15.01batch/s]

[Epoch 4] Train Loss: 1.538807 - Test Loss: 1.086362 - Train Error: 41.61% - Test Error: 34.86%



Epoch 4: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 4: 100%|██████████| 67/67 [00:04<00:00, 14.97batch/s]


[Epoch 5] Train Loss: 1.447212 - Test Loss: 0.878796 - Train Error: 38.26% - Test Error: 28.60%


Epoch 5: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 5: 100%|██████████| 67/67 [00:04<00:00, 14.71batch/s]


[Epoch 6] Train Loss: 1.405234 - Test Loss: 0.852448 - Train Error: 34.61% - Test Error: 26.56%


Epoch 6: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 6: 100%|██████████| 67/67 [00:04<00:00, 15.10batch/s]

[Epoch 7] Train Loss: 1.383573 - Test Loss: 0.941847 - Train Error: 33.44% - Test Error: 30.20%



Epoch 7: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 7: 100%|██████████| 67/67 [00:04<00:00, 15.04batch/s]

[Epoch 8] Train Loss: 1.368090 - Test Loss: 0.865564 - Train Error: 32.66% - Test Error: 26.83%



Epoch 8: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 8: 100%|██████████| 67/67 [00:04<00:00, 15.10batch/s]


[Epoch 9] Train Loss: 1.344101 - Test Loss: 0.779913 - Train Error: 32.19% - Test Error: 22.81%


Epoch 9: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 9: 100%|██████████| 67/67 [00:04<00:00, 14.90batch/s]

[Epoch 10] Train Loss: 1.283340 - Test Loss: 0.863112 - Train Error: 29.53% - Test Error: 27.06%



Epoch 10: 100%|██████████| 334/334 [01:06<00:00,  5.00batch/s]
Test 10: 100%|██████████| 67/67 [00:04<00:00, 14.94batch/s]


[Epoch 11] Train Loss: 1.285966 - Test Loss: 0.739673 - Train Error: 28.28% - Test Error: 22.28%


Epoch 11: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 11: 100%|██████████| 67/67 [00:04<00:00, 15.10batch/s]


[Epoch 12] Train Loss: 1.211295 - Test Loss: 0.685908 - Train Error: 25.47% - Test Error: 21.38%


Epoch 12: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 12: 100%|██████████| 67/67 [00:04<00:00, 14.67batch/s]

[Epoch 13] Train Loss: 1.219179 - Test Loss: 0.796718 - Train Error: 25.98% - Test Error: 24.55%



Epoch 13: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 13: 100%|██████████| 67/67 [00:04<00:00, 15.04batch/s]

[Epoch 14] Train Loss: 1.234237 - Test Loss: 0.843162 - Train Error: 25.44% - Test Error: 25.93%



Epoch 14: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 14: 100%|██████████| 67/67 [00:04<00:00, 14.61batch/s]

[Epoch 15] Train Loss: 1.228620 - Test Loss: 0.819294 - Train Error: 25.84% - Test Error: 24.53%



Epoch 15: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 15: 100%|██████████| 67/67 [00:04<00:00, 14.84batch/s]


[Epoch 16] Train Loss: 1.196722 - Test Loss: 0.699280 - Train Error: 23.53% - Test Error: 19.66%


Epoch 16: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 16: 100%|██████████| 67/67 [00:04<00:00, 14.73batch/s]

[Epoch 17] Train Loss: 1.198114 - Test Loss: 0.770257 - Train Error: 24.15% - Test Error: 22.65%



Epoch 17: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 17: 100%|██████████| 67/67 [00:04<00:00, 14.88batch/s]

[Epoch 18] Train Loss: 1.175939 - Test Loss: 0.705588 - Train Error: 23.27% - Test Error: 20.53%



Epoch 18: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 18: 100%|██████████| 67/67 [00:04<00:00, 15.01batch/s]


[Epoch 19] Train Loss: 1.158750 - Test Loss: 0.656576 - Train Error: 21.59% - Test Error: 19.02%


Epoch 19: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 19: 100%|██████████| 67/67 [00:04<00:00, 15.04batch/s]

[Epoch 20] Train Loss: 1.148021 - Test Loss: 0.801367 - Train Error: 21.52% - Test Error: 23.03%



Epoch 20: 100%|██████████| 334/334 [01:06<00:00,  4.99batch/s]
Test 20: 100%|██████████| 67/67 [00:04<00:00, 14.92batch/s]


[Epoch 21] Train Loss: 1.090055 - Test Loss: 0.675165 - Train Error: 20.01% - Test Error: 18.99%


Epoch 21: 100%|██████████| 334/334 [01:07<00:00,  4.98batch/s]
Test 21: 100%|██████████| 67/67 [00:04<00:00, 14.98batch/s]

[Epoch 22] Train Loss: 1.108033 - Test Loss: 0.753124 - Train Error: 20.30% - Test Error: 22.81%



Epoch 22: 100%|██████████| 334/334 [01:06<00:00,  5.00batch/s]
Test 22: 100%|██████████| 67/67 [00:04<00:00, 15.05batch/s]

[Epoch 23] Train Loss: 1.119149 - Test Loss: 0.748739 - Train Error: 20.87% - Test Error: 22.10%



Epoch 23: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 23: 100%|██████████| 67/67 [00:04<00:00, 15.17batch/s]


[Epoch 24] Train Loss: 1.112974 - Test Loss: 0.660327 - Train Error: 20.39% - Test Error: 18.60%


Epoch 24: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 24: 100%|██████████| 67/67 [00:04<00:00, 15.26batch/s]

[Epoch 25] Train Loss: 1.071307 - Test Loss: 0.704438 - Train Error: 18.93% - Test Error: 20.12%



Epoch 25: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 25: 100%|██████████| 67/67 [00:04<00:00, 15.13batch/s]

[Epoch 26] Train Loss: 1.053053 - Test Loss: 0.677104 - Train Error: 18.94% - Test Error: 19.29%



Epoch 26: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 26: 100%|██████████| 67/67 [00:04<00:00, 15.17batch/s]

[Epoch 27] Train Loss: 1.048773 - Test Loss: 0.659304 - Train Error: 17.66% - Test Error: 18.69%



Epoch 27: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 27: 100%|██████████| 67/67 [00:04<00:00, 15.02batch/s]


[Epoch 28] Train Loss: 1.103733 - Test Loss: 0.668712 - Train Error: 19.90% - Test Error: 18.30%


Epoch 28: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 28: 100%|██████████| 67/67 [00:04<00:00, 15.16batch/s]

[Epoch 29] Train Loss: 1.075856 - Test Loss: 0.685768 - Train Error: 18.97% - Test Error: 19.38%



Epoch 29: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 29: 100%|██████████| 67/67 [00:04<00:00, 15.28batch/s]

[Epoch 30] Train Loss: 1.014681 - Test Loss: 0.772918 - Train Error: 17.71% - Test Error: 23.66%



Epoch 30: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 30: 100%|██████████| 67/67 [00:04<00:00, 15.11batch/s]


[Epoch 31] Train Loss: 0.982794 - Test Loss: 0.582897 - Train Error: 15.04% - Test Error: 15.92%


Epoch 31: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 31: 100%|██████████| 67/67 [00:04<00:00, 15.29batch/s]


[Epoch 32] Train Loss: 0.979063 - Test Loss: 0.586000 - Train Error: 15.77% - Test Error: 15.73%


Epoch 32: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 32: 100%|██████████| 67/67 [00:04<00:00, 15.20batch/s]

[Epoch 33] Train Loss: 0.965369 - Test Loss: 0.575791 - Train Error: 14.75% - Test Error: 15.83%



Epoch 33: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 33: 100%|██████████| 67/67 [00:04<00:00, 15.18batch/s]


[Epoch 34] Train Loss: 0.975550 - Test Loss: 0.586707 - Train Error: 15.52% - Test Error: 15.69%


Epoch 34: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 34: 100%|██████████| 67/67 [00:04<00:00, 15.02batch/s]

[Epoch 35] Train Loss: 0.944063 - Test Loss: 0.599102 - Train Error: 13.69% - Test Error: 16.25%



Epoch 35: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 35: 100%|██████████| 67/67 [00:04<00:00, 15.18batch/s]

[Epoch 36] Train Loss: 0.965141 - Test Loss: 0.591105 - Train Error: 14.20% - Test Error: 15.83%



Epoch 36: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 36: 100%|██████████| 67/67 [00:04<00:00, 15.14batch/s]


[Epoch 37] Train Loss: 0.954361 - Test Loss: 0.574811 - Train Error: 15.30% - Test Error: 15.53%


Epoch 37: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 37: 100%|██████████| 67/67 [00:04<00:00, 15.33batch/s]


[Epoch 38] Train Loss: 0.975102 - Test Loss: 0.554320 - Train Error: 14.74% - Test Error: 15.39%


Epoch 38: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 38: 100%|██████████| 67/67 [00:04<00:00, 15.24batch/s]

[Epoch 39] Train Loss: 0.949426 - Test Loss: 0.582789 - Train Error: 14.20% - Test Error: 15.89%



Epoch 39: 100%|██████████| 334/334 [01:06<00:00,  5.03batch/s]
Test 39: 100%|██████████| 67/67 [00:04<00:00, 15.17batch/s]

[Epoch 40] Train Loss: 0.951563 - Test Loss: 0.605480 - Train Error: 14.04% - Test Error: 15.87%



Epoch 40: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 40: 100%|██████████| 67/67 [00:04<00:00, 15.30batch/s]

[Epoch 41] Train Loss: 0.953738 - Test Loss: 0.622459 - Train Error: 14.78% - Test Error: 15.88%



Epoch 41: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 41: 100%|██████████| 67/67 [00:04<00:00, 15.09batch/s]

[Epoch 42] Train Loss: 0.943459 - Test Loss: 0.577120 - Train Error: 13.62% - Test Error: 15.53%



Epoch 42: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 42: 100%|██████████| 67/67 [00:04<00:00, 15.13batch/s]

[Epoch 43] Train Loss: 0.929835 - Test Loss: 0.591128 - Train Error: 13.16% - Test Error: 15.69%



Epoch 43: 100%|██████████| 334/334 [01:07<00:00,  4.97batch/s]
Test 43: 100%|██████████| 67/67 [00:04<00:00, 15.27batch/s]

[Epoch 44] Train Loss: 0.936213 - Test Loss: 0.580666 - Train Error: 13.94% - Test Error: 15.48%



Epoch 44: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 44: 100%|██████████| 67/67 [00:04<00:00, 15.20batch/s]

[Epoch 45] Train Loss: 0.981183 - Test Loss: 0.599289 - Train Error: 15.19% - Test Error: 15.58%



Epoch 45: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 45: 100%|██████████| 67/67 [00:04<00:00, 15.00batch/s]

[Epoch 46] Train Loss: 0.954619 - Test Loss: 0.591974 - Train Error: 14.88% - Test Error: 15.57%



Epoch 46: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 46: 100%|██████████| 67/67 [00:04<00:00, 15.30batch/s]


[Epoch 47] Train Loss: 0.952955 - Test Loss: 0.556222 - Train Error: 14.55% - Test Error: 15.34%


Epoch 47: 100%|██████████| 334/334 [01:05<00:00,  5.06batch/s]
Test 47: 100%|██████████| 67/67 [00:04<00:00, 15.18batch/s]


[Epoch 48] Train Loss: 0.946856 - Test Loss: 0.567615 - Train Error: 14.22% - Test Error: 15.31%


Epoch 48: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 48: 100%|██████████| 67/67 [00:04<00:00, 15.14batch/s]

[Epoch 49] Train Loss: 0.930506 - Test Loss: 0.578671 - Train Error: 13.49% - Test Error: 15.68%



Epoch 49: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 49: 100%|██████████| 67/67 [00:04<00:00, 15.25batch/s]

[Epoch 50] Train Loss: 0.934018 - Test Loss: 0.579242 - Train Error: 13.73% - Test Error: 15.52%



Epoch 50: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 50: 100%|██████████| 67/67 [00:04<00:00, 15.28batch/s]

[Epoch 51] Train Loss: 0.936458 - Test Loss: 0.562001 - Train Error: 14.78% - Test Error: 15.40%



Epoch 51: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 51: 100%|██████████| 67/67 [00:04<00:00, 15.18batch/s]

[Epoch 52] Train Loss: 0.917688 - Test Loss: 0.551562 - Train Error: 13.01% - Test Error: 15.33%



Epoch 52: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 52: 100%|██████████| 67/67 [00:04<00:00, 15.36batch/s]

[Epoch 53] Train Loss: 0.925630 - Test Loss: 0.556015 - Train Error: 12.93% - Test Error: 15.56%



Epoch 53: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 53: 100%|██████████| 67/67 [00:04<00:00, 15.06batch/s]

[Epoch 54] Train Loss: 0.979689 - Test Loss: 0.600559 - Train Error: 14.86% - Test Error: 15.71%



Epoch 54: 100%|██████████| 334/334 [01:05<00:00,  5.06batch/s]
Test 54: 100%|██████████| 67/67 [00:04<00:00, 15.35batch/s]

[Epoch 55] Train Loss: 0.973463 - Test Loss: 0.560031 - Train Error: 14.31% - Test Error: 15.37%



Epoch 55: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 55: 100%|██████████| 67/67 [00:04<00:00, 15.09batch/s]

[Epoch 56] Train Loss: 0.986867 - Test Loss: 0.584060 - Train Error: 14.93% - Test Error: 15.33%



Epoch 56: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 56: 100%|██████████| 67/67 [00:04<00:00, 15.14batch/s]

[Epoch 57] Train Loss: 0.928286 - Test Loss: 0.559748 - Train Error: 13.46% - Test Error: 15.34%



Epoch 57: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 57: 100%|██████████| 67/67 [00:04<00:00, 15.18batch/s]

[Epoch 58] Train Loss: 0.938110 - Test Loss: 0.545911 - Train Error: 14.62% - Test Error: 15.39%



Epoch 58: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 58: 100%|██████████| 67/67 [00:04<00:00, 15.36batch/s]

[Epoch 59] Train Loss: 0.923618 - Test Loss: 0.567900 - Train Error: 10.97% - Test Error: 15.55%



Epoch 59: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 59: 100%|██████████| 67/67 [00:04<00:00, 15.35batch/s]

[Epoch 60] Train Loss: 0.926733 - Test Loss: 0.579250 - Train Error: 12.85% - Test Error: 15.45%



Epoch 60: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 60: 100%|██████████| 67/67 [00:04<00:00, 15.34batch/s]


[Epoch 61] Train Loss: 0.933136 - Test Loss: 0.558434 - Train Error: 12.41% - Test Error: 15.07%


Epoch 61: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 61: 100%|██████████| 67/67 [00:04<00:00, 15.11batch/s]

[Epoch 62] Train Loss: 0.959610 - Test Loss: 0.579834 - Train Error: 13.94% - Test Error: 15.33%



Epoch 62: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 62: 100%|██████████| 67/67 [00:04<00:00, 15.12batch/s]

[Epoch 63] Train Loss: 0.917942 - Test Loss: 0.555635 - Train Error: 13.21% - Test Error: 15.43%



Epoch 63: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 63: 100%|██████████| 67/67 [00:04<00:00, 14.98batch/s]

[Epoch 64] Train Loss: 0.921439 - Test Loss: 0.567591 - Train Error: 12.84% - Test Error: 15.55%



Epoch 64: 100%|██████████| 334/334 [01:05<00:00,  5.07batch/s]
Test 64: 100%|██████████| 67/67 [00:04<00:00, 15.32batch/s]

[Epoch 65] Train Loss: 0.892510 - Test Loss: 0.547509 - Train Error: 12.50% - Test Error: 15.20%



Epoch 65: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 65: 100%|██████████| 67/67 [00:04<00:00, 15.21batch/s]

[Epoch 66] Train Loss: 0.941142 - Test Loss: 0.548694 - Train Error: 14.28% - Test Error: 15.21%



Epoch 66: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 66: 100%|██████████| 67/67 [00:04<00:00, 15.24batch/s]

[Epoch 67] Train Loss: 0.945369 - Test Loss: 0.577346 - Train Error: 13.66% - Test Error: 15.54%



Epoch 67: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 67: 100%|██████████| 67/67 [00:04<00:00, 15.29batch/s]

[Epoch 68] Train Loss: 0.971157 - Test Loss: 0.566130 - Train Error: 15.34% - Test Error: 15.42%



Epoch 68: 100%|██████████| 334/334 [01:06<00:00,  5.04batch/s]
Test 68: 100%|██████████| 67/67 [00:04<00:00, 15.21batch/s]

[Epoch 69] Train Loss: 0.928691 - Test Loss: 0.568143 - Train Error: 13.54% - Test Error: 15.44%



Epoch 69: 100%|██████████| 334/334 [01:05<00:00,  5.06batch/s]
Test 69: 100%|██████████| 67/67 [00:04<00:00, 14.90batch/s]

[Epoch 70] Train Loss: 0.956724 - Test Loss: 0.566857 - Train Error: 14.10% - Test Error: 15.40%



Epoch 70: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 70: 100%|██████████| 67/67 [00:04<00:00, 15.24batch/s]

[Epoch 71] Train Loss: 0.965249 - Test Loss: 0.560551 - Train Error: 14.05% - Test Error: 15.26%



Epoch 71: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 71: 100%|██████████| 67/67 [00:04<00:00, 15.25batch/s]

[Epoch 72] Train Loss: 0.936102 - Test Loss: 0.566681 - Train Error: 13.92% - Test Error: 15.66%



Epoch 72: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 72: 100%|██████████| 67/67 [00:04<00:00, 15.11batch/s]

[Epoch 73] Train Loss: 0.947465 - Test Loss: 0.566372 - Train Error: 13.81% - Test Error: 15.19%



Epoch 73: 100%|██████████| 334/334 [01:06<00:00,  5.05batch/s]
Test 73: 100%|██████████| 67/67 [00:04<00:00, 15.10batch/s]

[Epoch 74] Train Loss: 0.951537 - Test Loss: 0.573404 - Train Error: 13.33% - Test Error: 15.45%



Epoch 74: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 74: 100%|██████████| 67/67 [00:04<00:00, 15.04batch/s]

[Epoch 75] Train Loss: 0.943043 - Test Loss: 0.590109 - Train Error: 13.32% - Test Error: 15.40%



Epoch 75: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 75: 100%|██████████| 67/67 [00:04<00:00, 15.15batch/s]

[Epoch 76] Train Loss: 0.951831 - Test Loss: 0.582042 - Train Error: 13.84% - Test Error: 15.40%



Epoch 76: 100%|██████████| 334/334 [01:06<00:00,  5.06batch/s]
Test 76: 100%|██████████| 67/67 [00:04<00:00, 15.30batch/s]

[Epoch 77] Train Loss: 0.958358 - Test Loss: 0.550443 - Train Error: 13.99% - Test Error: 15.23%

Early Stopping at epoch  76

BEST TEST ERROR:  15.07  in epoch  60



