In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim 
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import numpy as np
from tqdm import tqdm 

In [2]:
#Check device 
if torch.backends.mps.is_available():
    device = torch.device('mps')

In [11]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import ssl

def get_dataloaders(batch_size=128, data_dir='./data'):
    # Fix SSL certificate issue
    ssl._create_default_https_context = ssl._create_unverified_context
    
    # Define transforms
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
    ])
    
    # Download CIFAR-100
    trainset = datasets.CIFAR100(
        root=data_dir, train=True, download=True, transform=transform_train
    )
    testset = datasets.CIFAR100(
        root=data_dir, train=False, download=True, transform=transform_test
    )
    
    trainloader = DataLoader(
        trainset, batch_size=batch_size, shuffle=True, num_workers=2
    )
    testloader = DataLoader(
        testset, batch_size=batch_size, shuffle=False, num_workers=2
    )
    
    return trainloader, testloader

In [12]:
# Model A: VGG without BatchNorm

class VGG_NoBN(nn.Module):
    def __init__(self, num_classes = 100):
        super(VGG_NoBN, self).__init__()
        
        self.features = nn.Sequential(
            #Block 1
            nn.Conv2d(3, 64, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(64, 64, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2, 2),

            #Block 2
            nn.Conv2d(64, 128, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(128, 128, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2,2),

            #Block3
            nn.Conv2d(128, 256, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(256, 256, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(256, 256, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2,2),

            # Block 4
            nn.Conv2d(256, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 512),
            nn.ReLU(inplace = True),
            nn.Dropout(0.5),
            nn.Linear(512, 512),
            nn.ReLU(inplace = True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal(m.weight, mode = 'fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


            


In [13]:
# Model b: VGG with BatchNorm before activation
class VGG_BN_Before(nn.Module):
    def __init__(self, num_classes=100):
        super(VGG_BN_Before, self).__init__()
        
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            # Block 2
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            # Block 3
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            
            # Block 4
            nn.Conv2d(256, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
        
        self._initialize_weights()
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


In [14]:
# Model c: VGG with BatchNorm after activation
class VGG_BN_After(nn.Module):
    def __init__(self, num_classes=100):
        super(VGG_BN_After, self).__init__()
        
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            
            # Block 2
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2, 2),
            
            # Block 3
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(2, 2),
            
            # Block 4
            nn.Conv2d(256, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(512),
            nn.MaxPool2d(2, 2),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
        
        self._initialize_weights()
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


In [15]:
#Training function

def train_epoch(model, trainloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(trainloader, desc = 'Training')
    for inputs, targets in pbar:
        inputs, targets = inputs.to(device), targets.to(device)

        #Training steps

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        #Tracking metrics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        # Update progress bar with current loss and accuracy
        pbar.set_postfix({'loss': running_loss/len(pbar), 
                         'acc': 100.*correct/total})
    # Return average loss and accuracy for the epoch
    return running_loss / len(trainloader), 100. * correct / total
        


In [16]:
# Testing function with top-1 and top-5 accuracy
def test(model, testloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    with torch.no_grad():
        pbar = tqdm(testloader, desc='Testing')
        for inputs, targets in pbar:
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            running_loss += loss.item()
            
            # Top-1 accuracy
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct_top1 += predicted.eq(targets).sum().item()
            
            # Top-5 accuracy
            _, top5_pred = outputs.topk(5, 1, True, True)
            top5_pred = top5_pred.t()
            correct_top5 += top5_pred.eq(targets.view(1, -1).expand_as(top5_pred)).sum().item()
            
            pbar.set_postfix({'loss': running_loss/len(pbar),
                            'top1': 100.*correct_top1/total,
                            'top5': 100.*correct_top5/total})
    
    top1_acc = 100. * correct_top1 / total
    top5_acc = 100. * correct_top5 / total
    avg_loss = running_loss / len(testloader)
    
    return avg_loss, top1_acc, top5_acc



In [17]:
# Main training function
def train_model(model, model_name, trainloader, testloader, num_epochs=30):
    print(f"\n{'='*60}")
    print(f"Training {model_name}")
    print(f"{'='*60}")
    
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    #optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 40], gamma=0.1)
    
    best_top1_acc = 0.0
    best_top5_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print(f"Learning rate: {scheduler.get_last_lr()[0]:.6f}")
        
        train_loss, train_acc = train_epoch(model, trainloader, criterion, optimizer, device)
        test_loss, top1_acc, top5_acc = test(model, testloader, criterion, device)
        
        scheduler.step()
        
        if top1_acc > best_top1_acc:
            best_top1_acc = top1_acc
            best_top5_acc = top5_acc
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f}, Top-1 Acc: {top1_acc:.2f}%, Top-5 Acc: {top5_acc:.2f}%")
        print(f"Best Top-1: {best_top1_acc:.2f}%, Best Top-5: {best_top5_acc:.2f}%")
    
    return best_top1_acc, best_top5_acc

In [18]:
# Main execution
if __name__ == "__main__":
    # Prepare data
    trainloader, testloader = get_dataloaders(batch_size=128)
    
    # Store results
    results = {}
    
    # Train model a: VGG without BatchNorm
    print("\n" + "="*60)
    print("MODEL A: VGG WITHOUT BATCH NORMALIZATION")
    print("="*60)
    model_a = VGG_NoBN()
    top1_a, top5_a = train_model(model_a, "VGG without BatchNorm", 
                                  trainloader, testloader, num_epochs=30)
    results['No BatchNorm'] = {'top1': top1_a, 'top5': top5_a}
    
    # Train model b: VGG with BatchNorm before activation
    print("\n" + "="*60)
    print("MODEL B: VGG WITH BATCH NORMALIZATION BEFORE ACTIVATION")
    print("="*60)
    model_b = VGG_BN_Before()
    top1_b, top5_b = train_model(model_b, "VGG with BatchNorm before ReLU", 
                                  trainloader, testloader, num_epochs=30)
    results['BatchNorm Before ReLU'] = {'top1': top1_b, 'top5': top5_b}
    
    # Train model c: VGG with BatchNorm after activation
    print("\n" + "="*60)
    print("MODEL C: VGG WITH BATCH NORMALIZATION AFTER ACTIVATION")
    print("="*60)
    model_c = VGG_BN_After()
    top1_c, top5_c = train_model(model_c, "VGG with BatchNorm after ReLU", 
                                  trainloader, testloader, num_epochs=30)
    results['BatchNorm After ReLU'] = {'top1': top1_c, 'top5': top5_c}
    
    # Print final comparison
    print("\n" + "="*60)
    print("FINAL RESULTS COMPARISON")
    print("="*60)
    print(f"{'Model':<30} {'Top-1 Accuracy':<20} {'Top-5 Accuracy':<20}")
    print("-" * 70)
    for model_name, acc in results.items():
        print(f"{model_name:<30} {acc['top1']:>18.2f}% {acc['top5']:>18.2f}%")
    print("="*60)

100%|██████████| 169M/169M [00:48<00:00, 3.46MB/s] 
  nn.init.kaiming_normal(m.weight, mode = 'fan_out', nonlinearity='relu')



MODEL A: VGG WITHOUT BATCH NORMALIZATION

Training VGG without BatchNorm

Epoch 1/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:40<00:00,  9.72it/s, loss=4.61, acc=0.864]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.70it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6058, Train Acc: 0.86%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 2/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.85it/s, loss=4.61, acc=0.856]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.73it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.86%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 3/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.81it/s, loss=4.61, acc=0.89] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.55it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.89%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 4/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.85it/s, loss=4.61, acc=0.902]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.75it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.90%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 5/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.89it/s, loss=4.61, acc=0.894]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.89%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 6/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.90it/s, loss=4.61, acc=0.912]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.70it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.91%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 7/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.83it/s, loss=4.61, acc=0.908]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.74it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.91%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 8/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.80it/s, loss=4.61, acc=0.846]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.73it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.85%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 9/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.87it/s, loss=4.61, acc=0.812]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.68it/s, loss=4.61, top1=1, top5=5]       


Train Loss: 4.6056, Train Acc: 0.81%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 10/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.88it/s, loss=4.61, acc=0.904]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.73it/s, loss=4.61, top1=1, top5=5]       


Train Loss: 4.6056, Train Acc: 0.90%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 11/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.86it/s, loss=4.61, acc=0.862]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.86%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 12/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.86it/s, loss=4.61, acc=0.864]
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.63it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6057, Train Acc: 0.86%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 13/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.87it/s, loss=4.61, acc=0.866]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.69it/s, loss=4.61, top1=1, top5=5]       


Train Loss: 4.6057, Train Acc: 0.87%
Test Loss: 4.6053, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 14/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.87it/s, loss=4.61, acc=0.908]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.64it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.91%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 15/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.82it/s, loss=4.61, acc=0.91] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.75it/s, loss=4.61, top1=1, top5=5]       


Train Loss: 4.6056, Train Acc: 0.91%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 16/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.87it/s, loss=4.61, acc=0.82] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.74it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.82%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 17/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.86it/s, loss=4.61, acc=0.924]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.67it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.92%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 18/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.84it/s, loss=4.61, acc=0.884]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.74it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.88%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 19/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.86it/s, loss=4.61, acc=0.846]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.68it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6057, Train Acc: 0.85%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 20/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [04:52<00:00,  1.34it/s, loss=4.61, acc=0.894]
Testing: 100%|██████████| 79/79 [17:41<00:00, 13.44s/it, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6057, Train Acc: 0.89%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 21/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [22:45<00:00,  3.49s/it, loss=4.61, acc=0.882]
Testing: 100%|██████████| 79/79 [20:03<00:00, 15.24s/it, loss=4.61, top1=1, top5=5]             


Train Loss: 4.6056, Train Acc: 0.88%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 22/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.90it/s, loss=4.61, acc=0.866]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.74it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.87%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 23/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.90it/s, loss=4.61, acc=0.92] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.76it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.92%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 24/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.86it/s, loss=4.61, acc=0.882]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.69it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.88%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 25/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.89it/s, loss=4.61, acc=0.9]  
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.64it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.90%
Test Loss: 4.6053, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 26/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.89it/s, loss=4.61, acc=0.876]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.74it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6057, Train Acc: 0.88%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 27/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [01:43<00:00,  3.77it/s, loss=4.61, acc=0.89] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.89%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 28/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.89it/s, loss=4.61, acc=0.902]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.76it/s, loss=4.61, top1=1, top5=5]       


Train Loss: 4.6056, Train Acc: 0.90%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 29/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:39<00:00,  9.88it/s, loss=4.61, acc=0.836]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=4.61, top1=1, top5=5]       


Train Loss: 4.6056, Train Acc: 0.84%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

Epoch 30/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [21:33<00:00,  3.31s/it, loss=4.61, acc=0.83]   
Testing: 100%|██████████| 79/79 [49:04<00:00, 37.27s/it, loss=4.61, top1=1, top5=5]        


Train Loss: 4.6056, Train Acc: 0.83%
Test Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
Best Top-1: 1.00%, Best Top-5: 5.00%

MODEL B: VGG WITH BATCH NORMALIZATION BEFORE ACTIVATION

Training VGG with BatchNorm before ReLU

Epoch 1/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [35:51<00:00,  5.50s/it, loss=4.41, acc=2.24] 
Testing: 100%|██████████| 79/79 [31:48<00:00, 24.15s/it, loss=4.29, top1=2.93, top5=15]      


Train Loss: 4.4149, Train Acc: 2.24%
Test Loss: 4.2879, Top-1 Acc: 2.93%, Top-5 Acc: 15.03%
Best Top-1: 2.93%, Best Top-5: 15.03%

Epoch 2/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [44:30<00:00,  6.83s/it, loss=4.21, acc=3.53]   
Testing: 100%|██████████| 79/79 [46:56<00:00, 35.65s/it, loss=4.12, top1=4.66, top5=20.1]     


Train Loss: 4.2122, Train Acc: 3.53%
Test Loss: 4.1244, Top-1 Acc: 4.66%, Top-5 Acc: 20.11%
Best Top-1: 4.66%, Best Top-5: 20.11%

Epoch 3/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:44<00:00,  8.84it/s, loss=4.09, acc=4.58]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.68it/s, loss=4.02, top1=5.46, top5=22.5] 


Train Loss: 4.0920, Train Acc: 4.58%
Test Loss: 4.0159, Top-1 Acc: 5.46%, Top-5 Acc: 22.46%
Best Top-1: 5.46%, Best Top-5: 22.46%

Epoch 4/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  9.00it/s, loss=3.99, acc=5.72]
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.64it/s, loss=3.9, top1=6.75, top5=26.2]  


Train Loss: 3.9867, Train Acc: 5.72%
Test Loss: 3.9038, Top-1 Acc: 6.75%, Top-5 Acc: 26.19%
Best Top-1: 6.75%, Best Top-5: 26.19%

Epoch 5/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.97it/s, loss=3.88, acc=7.04] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=3.9, top1=7.31, top5=27.5]  


Train Loss: 3.8775, Train Acc: 7.04%
Test Loss: 3.9001, Top-1 Acc: 7.31%, Top-5 Acc: 27.47%
Best Top-1: 7.31%, Best Top-5: 27.47%

Epoch 6/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=3.7, acc=9.23]  
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.64it/s, loss=3.55, top1=11.2, top5=37]   


Train Loss: 3.7050, Train Acc: 9.23%
Test Loss: 3.5543, Top-1 Acc: 11.16%, Top-5 Acc: 36.97%
Best Top-1: 11.16%, Best Top-5: 36.97%

Epoch 7/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.96it/s, loss=3.51, acc=11.7] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=3.3, top1=15, top5=44.9]    


Train Loss: 3.5113, Train Acc: 11.68%
Test Loss: 3.3043, Top-1 Acc: 14.97%, Top-5 Acc: 44.93%
Best Top-1: 14.97%, Best Top-5: 44.93%

Epoch 8/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.92it/s, loss=3.34, acc=14.5] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.56it/s, loss=3.11, top1=18.1, top5=51]   


Train Loss: 3.3365, Train Acc: 14.52%
Test Loss: 3.1143, Top-1 Acc: 18.12%, Top-5 Acc: 51.02%
Best Top-1: 18.12%, Best Top-5: 51.02%

Epoch 9/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.91it/s, loss=3.17, acc=17.3] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.67it/s, loss=3, top1=20.6, top5=53.4]    


Train Loss: 3.1696, Train Acc: 17.29%
Test Loss: 2.9986, Top-1 Acc: 20.57%, Top-5 Acc: 53.38%
Best Top-1: 20.57%, Best Top-5: 53.38%

Epoch 10/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.97it/s, loss=3.02, acc=20.2] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.51it/s, loss=2.82, top1=24, top5=59.1]   


Train Loss: 3.0233, Train Acc: 20.21%
Test Loss: 2.8211, Top-1 Acc: 24.01%, Top-5 Acc: 59.14%
Best Top-1: 24.01%, Best Top-5: 59.14%

Epoch 11/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  9.01it/s, loss=2.89, acc=22.5] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.61it/s, loss=2.77, top1=24.6, top5=60.5] 


Train Loss: 2.8939, Train Acc: 22.54%
Test Loss: 2.7652, Top-1 Acc: 24.55%, Top-5 Acc: 60.47%
Best Top-1: 24.55%, Best Top-5: 60.47%

Epoch 12/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.96it/s, loss=2.78, acc=24.7] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.57it/s, loss=2.64, top1=28.3, top5=63.2] 


Train Loss: 2.7832, Train Acc: 24.74%
Test Loss: 2.6393, Top-1 Acc: 28.28%, Top-5 Acc: 63.18%
Best Top-1: 28.28%, Best Top-5: 63.18%

Epoch 13/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=2.7, acc=26.7]  
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.63it/s, loss=2.61, top1=28.9, top5=64.4] 


Train Loss: 2.6984, Train Acc: 26.66%
Test Loss: 2.6074, Top-1 Acc: 28.92%, Top-5 Acc: 64.42%
Best Top-1: 28.92%, Best Top-5: 64.42%

Epoch 14/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  9.06it/s, loss=2.6, acc=29.3]  
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.69it/s, loss=2.55, top1=31.2, top5=65.3] 


Train Loss: 2.5966, Train Acc: 29.33%
Test Loss: 2.5540, Top-1 Acc: 31.21%, Top-5 Acc: 65.35%
Best Top-1: 31.21%, Best Top-5: 65.35%

Epoch 15/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  9.06it/s, loss=2.52, acc=31.2] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.55it/s, loss=2.53, top1=32, top5=66.7]   


Train Loss: 2.5167, Train Acc: 31.24%
Test Loss: 2.5340, Top-1 Acc: 31.97%, Top-5 Acc: 66.71%
Best Top-1: 31.97%, Best Top-5: 66.71%

Epoch 16/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=2.45, acc=32.4] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.58it/s, loss=2.61, top1=31.8, top5=65.3] 


Train Loss: 2.4545, Train Acc: 32.42%
Test Loss: 2.6058, Top-1 Acc: 31.75%, Top-5 Acc: 65.27%
Best Top-1: 31.97%, Best Top-5: 66.71%

Epoch 17/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=2.38, acc=34.4] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.57it/s, loss=2.26, top1=37.9, top5=72.5] 


Train Loss: 2.3801, Train Acc: 34.43%
Test Loss: 2.2551, Top-1 Acc: 37.91%, Top-5 Acc: 72.52%
Best Top-1: 37.91%, Best Top-5: 72.52%

Epoch 18/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.93it/s, loss=2.33, acc=35.9] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=2.28, top1=37.4, top5=71.7] 


Train Loss: 2.3307, Train Acc: 35.85%
Test Loss: 2.2845, Top-1 Acc: 37.43%, Top-5 Acc: 71.66%
Best Top-1: 37.91%, Best Top-5: 72.52%

Epoch 19/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=2.27, acc=37.4] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.57it/s, loss=2.22, top1=38.8, top5=72.5] 


Train Loss: 2.2665, Train Acc: 37.43%
Test Loss: 2.2246, Top-1 Acc: 38.79%, Top-5 Acc: 72.47%
Best Top-1: 38.79%, Best Top-5: 72.47%

Epoch 20/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.90it/s, loss=2.22, acc=38.4] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.58it/s, loss=2.1, top1=41.8, top5=75.2]  


Train Loss: 2.2201, Train Acc: 38.42%
Test Loss: 2.0967, Top-1 Acc: 41.81%, Top-5 Acc: 75.23%
Best Top-1: 41.81%, Best Top-5: 75.23%

Epoch 21/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.96it/s, loss=2.17, acc=40]   
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.67it/s, loss=2.15, top1=40.9, top5=74.1] 


Train Loss: 2.1706, Train Acc: 39.98%
Test Loss: 2.1533, Top-1 Acc: 40.90%, Top-5 Acc: 74.08%
Best Top-1: 41.81%, Best Top-5: 75.23%

Epoch 22/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.93it/s, loss=2.12, acc=41.2] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.62it/s, loss=2.09, top1=42.4, top5=75.7] 


Train Loss: 2.1238, Train Acc: 41.16%
Test Loss: 2.0888, Top-1 Acc: 42.42%, Top-5 Acc: 75.67%
Best Top-1: 42.42%, Best Top-5: 75.67%

Epoch 23/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  9.00it/s, loss=2.08, acc=41.9] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.71it/s, loss=2.08, top1=43.2, top5=75.6] 


Train Loss: 2.0823, Train Acc: 41.93%
Test Loss: 2.0811, Top-1 Acc: 43.16%, Top-5 Acc: 75.62%
Best Top-1: 43.16%, Best Top-5: 75.62%

Epoch 24/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.97it/s, loss=2.05, acc=43]   
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.56it/s, loss=1.97, top1=45.5, top5=78]   


Train Loss: 2.0482, Train Acc: 42.98%
Test Loss: 1.9660, Top-1 Acc: 45.53%, Top-5 Acc: 78.00%
Best Top-1: 45.53%, Best Top-5: 78.00%

Epoch 25/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=2.01, acc=44.4] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.58it/s, loss=1.97, top1=45.5, top5=78.2] 


Train Loss: 2.0071, Train Acc: 44.42%
Test Loss: 1.9721, Top-1 Acc: 45.50%, Top-5 Acc: 78.17%
Best Top-1: 45.53%, Best Top-5: 78.00%

Epoch 26/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.97it/s, loss=1.97, acc=45.3] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.56it/s, loss=2.12, top1=44.1, top5=75.8] 


Train Loss: 1.9681, Train Acc: 45.26%
Test Loss: 2.1166, Top-1 Acc: 44.09%, Top-5 Acc: 75.82%
Best Top-1: 45.53%, Best Top-5: 78.00%

Epoch 27/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=1.95, acc=45.9] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.50it/s, loss=1.96, top1=46.4, top5=78.3] 


Train Loss: 1.9453, Train Acc: 45.91%
Test Loss: 1.9585, Top-1 Acc: 46.35%, Top-5 Acc: 78.26%
Best Top-1: 46.35%, Best Top-5: 78.26%

Epoch 28/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.96it/s, loss=1.91, acc=46.8] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.53it/s, loss=1.86, top1=48.1, top5=79.6] 


Train Loss: 1.9138, Train Acc: 46.75%
Test Loss: 1.8633, Top-1 Acc: 48.06%, Top-5 Acc: 79.57%
Best Top-1: 48.06%, Best Top-5: 79.57%

Epoch 29/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.97it/s, loss=1.89, acc=47]   
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.55it/s, loss=1.87, top1=48.2, top5=79.9] 


Train Loss: 1.8947, Train Acc: 47.02%
Test Loss: 1.8663, Top-1 Acc: 48.16%, Top-5 Acc: 79.88%
Best Top-1: 48.16%, Best Top-5: 79.88%

Epoch 30/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.91it/s, loss=1.86, acc=48.1] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.57it/s, loss=1.94, top1=47.5, top5=78.8] 


Train Loss: 1.8618, Train Acc: 48.07%
Test Loss: 1.9403, Top-1 Acc: 47.52%, Top-5 Acc: 78.77%
Best Top-1: 48.16%, Best Top-5: 79.88%

MODEL C: VGG WITH BATCH NORMALIZATION AFTER ACTIVATION

Training VGG with BatchNorm after ReLU

Epoch 1/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=4.2, acc=4.91]  
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=4.03, top1=6.45, top5=25.8] 


Train Loss: 4.1951, Train Acc: 4.91%
Test Loss: 4.0283, Top-1 Acc: 6.45%, Top-5 Acc: 25.75%
Best Top-1: 6.45%, Best Top-5: 25.75%

Epoch 2/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=3.96, acc=6.92]
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.58it/s, loss=3.74, top1=9.57, top5=33.5] 


Train Loss: 3.9581, Train Acc: 6.92%
Test Loss: 3.7353, Top-1 Acc: 9.57%, Top-5 Acc: 33.50%
Best Top-1: 9.57%, Best Top-5: 33.50%

Epoch 3/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=3.77, acc=8.74] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=3.6, top1=10.2, top5=36.2]  


Train Loss: 3.7750, Train Acc: 8.74%
Test Loss: 3.5999, Top-1 Acc: 10.19%, Top-5 Acc: 36.21%
Best Top-1: 10.19%, Best Top-5: 36.21%

Epoch 4/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=3.56, acc=11.6] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.53it/s, loss=3.43, top1=13.3, top5=41.6] 


Train Loss: 3.5632, Train Acc: 11.56%
Test Loss: 3.4293, Top-1 Acc: 13.29%, Top-5 Acc: 41.65%
Best Top-1: 13.29%, Best Top-5: 41.65%

Epoch 5/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.92it/s, loss=3.32, acc=15.4] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=3.11, top1=18.5, top5=51.6] 


Train Loss: 3.3189, Train Acc: 15.36%
Test Loss: 3.1057, Top-1 Acc: 18.48%, Top-5 Acc: 51.65%
Best Top-1: 18.48%, Best Top-5: 51.65%

Epoch 6/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=3.09, acc=19.3] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.51it/s, loss=2.82, top1=24.1, top5=59.5] 


Train Loss: 3.0908, Train Acc: 19.33%
Test Loss: 2.8191, Top-1 Acc: 24.11%, Top-5 Acc: 59.50%
Best Top-1: 24.11%, Best Top-5: 59.50%

Epoch 7/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=2.88, acc=23.8] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.63it/s, loss=2.69, top1=27.4, top5=63.2] 


Train Loss: 2.8793, Train Acc: 23.77%
Test Loss: 2.6874, Top-1 Acc: 27.43%, Top-5 Acc: 63.24%
Best Top-1: 27.43%, Best Top-5: 63.24%

Epoch 8/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.93it/s, loss=2.71, acc=27.5] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.53it/s, loss=2.47, top1=32.6, top5=68]   


Train Loss: 2.7130, Train Acc: 27.50%
Test Loss: 2.4730, Top-1 Acc: 32.56%, Top-5 Acc: 68.01%
Best Top-1: 32.56%, Best Top-5: 68.01%

Epoch 9/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=2.55, acc=31.4] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=2.39, top1=34.1, top5=69]   


Train Loss: 2.5531, Train Acc: 31.36%
Test Loss: 2.3931, Top-1 Acc: 34.08%, Top-5 Acc: 68.98%
Best Top-1: 34.08%, Best Top-5: 68.98%

Epoch 10/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.93it/s, loss=2.43, acc=34.4] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.55it/s, loss=2.26, top1=37.9, top5=72.2] 


Train Loss: 2.4314, Train Acc: 34.37%
Test Loss: 2.2593, Top-1 Acc: 37.94%, Top-5 Acc: 72.19%
Best Top-1: 37.94%, Best Top-5: 72.19%

Epoch 11/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=2.31, acc=37.1] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.56it/s, loss=2.17, top1=40.3, top5=74]   


Train Loss: 2.3149, Train Acc: 37.06%
Test Loss: 2.1701, Top-1 Acc: 40.31%, Top-5 Acc: 73.96%
Best Top-1: 40.31%, Best Top-5: 73.96%

Epoch 12/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:44<00:00,  8.87it/s, loss=2.24, acc=39.3] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.67it/s, loss=2.17, top1=41.1, top5=73.7] 


Train Loss: 2.2370, Train Acc: 39.32%
Test Loss: 2.1681, Top-1 Acc: 41.10%, Top-5 Acc: 73.65%
Best Top-1: 41.10%, Best Top-5: 73.65%

Epoch 13/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.96it/s, loss=2.15, acc=41.5] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.64it/s, loss=2.05, top1=43.5, top5=76.8] 


Train Loss: 2.1473, Train Acc: 41.46%
Test Loss: 2.0520, Top-1 Acc: 43.53%, Top-5 Acc: 76.78%
Best Top-1: 43.53%, Best Top-5: 76.78%

Epoch 14/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=2.07, acc=43.6] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=1.99, top1=44.8, top5=77.2] 


Train Loss: 2.0718, Train Acc: 43.57%
Test Loss: 1.9910, Top-1 Acc: 44.78%, Top-5 Acc: 77.19%
Best Top-1: 44.78%, Best Top-5: 77.19%

Epoch 15/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=2.01, acc=45.3] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=1.88, top1=47.6, top5=79.7] 


Train Loss: 2.0054, Train Acc: 45.31%
Test Loss: 1.8847, Top-1 Acc: 47.65%, Top-5 Acc: 79.67%
Best Top-1: 47.65%, Best Top-5: 79.67%

Epoch 16/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [03:37<00:00,  1.80it/s, loss=1.94, acc=46.6]   
Testing: 100%|██████████| 79/79 [02:11<00:00,  1.67s/it, loss=1.9, top1=48.1, top5=79]    


Train Loss: 1.9428, Train Acc: 46.64%
Test Loss: 1.8974, Top-1 Acc: 48.07%, Top-5 Acc: 78.98%
Best Top-1: 48.07%, Best Top-5: 78.98%

Epoch 17/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.98it/s, loss=1.9, acc=48.3]  
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=1.83, top1=49.1, top5=80.2] 


Train Loss: 1.8954, Train Acc: 48.26%
Test Loss: 1.8289, Top-1 Acc: 49.15%, Top-5 Acc: 80.21%
Best Top-1: 49.15%, Best Top-5: 80.21%

Epoch 18/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:44<00:00,  8.83it/s, loss=1.85, acc=49.4] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.41it/s, loss=1.82, top1=49.8, top5=80.1] 


Train Loss: 1.8460, Train Acc: 49.40%
Test Loss: 1.8207, Top-1 Acc: 49.80%, Top-5 Acc: 80.14%
Best Top-1: 49.80%, Best Top-5: 80.14%

Epoch 19/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:44<00:00,  8.87it/s, loss=1.8, acc=50.5]  
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.55it/s, loss=1.77, top1=50.9, top5=80.9] 


Train Loss: 1.8000, Train Acc: 50.47%
Test Loss: 1.7677, Top-1 Acc: 50.91%, Top-5 Acc: 80.92%
Best Top-1: 50.91%, Best Top-5: 80.92%

Epoch 20/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.93it/s, loss=1.76, acc=51.7] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=1.76, top1=51.6, top5=81.3] 


Train Loss: 1.7622, Train Acc: 51.72%
Test Loss: 1.7555, Top-1 Acc: 51.62%, Top-5 Acc: 81.26%
Best Top-1: 51.62%, Best Top-5: 81.26%

Epoch 21/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=1.72, acc=52.8] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.57it/s, loss=1.76, top1=51.8, top5=81.6] 


Train Loss: 1.7220, Train Acc: 52.79%
Test Loss: 1.7563, Top-1 Acc: 51.76%, Top-5 Acc: 81.56%
Best Top-1: 51.76%, Best Top-5: 81.56%

Epoch 22/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.93it/s, loss=1.7, acc=53.3]  
Testing: 100%|██████████| 79/79 [00:53<00:00,  1.49it/s, loss=1.73, top1=53, top5=81.5]   


Train Loss: 1.6985, Train Acc: 53.31%
Test Loss: 1.7255, Top-1 Acc: 52.99%, Top-5 Acc: 81.52%
Best Top-1: 52.99%, Best Top-5: 81.52%

Epoch 23/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:44<00:00,  8.89it/s, loss=1.67, acc=54.1] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=1.69, top1=53.9, top5=82.3] 


Train Loss: 1.6678, Train Acc: 54.08%
Test Loss: 1.6874, Top-1 Acc: 53.90%, Top-5 Acc: 82.31%
Best Top-1: 53.90%, Best Top-5: 82.31%

Epoch 24/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=1.65, acc=54.5] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=1.71, top1=53.5, top5=82]   


Train Loss: 1.6464, Train Acc: 54.51%
Test Loss: 1.7075, Top-1 Acc: 53.49%, Top-5 Acc: 82.02%
Best Top-1: 53.90%, Best Top-5: 82.31%

Epoch 25/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=1.62, acc=55.5] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=1.7, top1=53.6, top5=82]    


Train Loss: 1.6201, Train Acc: 55.46%
Test Loss: 1.6957, Top-1 Acc: 53.60%, Top-5 Acc: 82.03%
Best Top-1: 53.90%, Best Top-5: 82.31%

Epoch 26/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=1.59, acc=56.1] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=1.66, top1=55.5, top5=82.9] 


Train Loss: 1.5873, Train Acc: 56.13%
Test Loss: 1.6637, Top-1 Acc: 55.46%, Top-5 Acc: 82.87%
Best Top-1: 55.46%, Best Top-5: 82.87%

Epoch 27/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=1.57, acc=57]   
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.66it/s, loss=1.68, top1=54.5, top5=82.6] 


Train Loss: 1.5692, Train Acc: 57.00%
Test Loss: 1.6770, Top-1 Acc: 54.52%, Top-5 Acc: 82.57%
Best Top-1: 55.46%, Best Top-5: 82.87%

Epoch 28/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=1.55, acc=57.3] 
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.64it/s, loss=1.61, top1=56.3, top5=83.8] 


Train Loss: 1.5497, Train Acc: 57.28%
Test Loss: 1.6092, Top-1 Acc: 56.31%, Top-5 Acc: 83.85%
Best Top-1: 56.31%, Best Top-5: 83.85%

Epoch 29/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.95it/s, loss=1.53, acc=57.7] 
Testing: 100%|██████████| 79/79 [00:13<00:00,  5.65it/s, loss=1.6, top1=56.4, top5=83.7]  


Train Loss: 1.5313, Train Acc: 57.74%
Test Loss: 1.6033, Top-1 Acc: 56.42%, Top-5 Acc: 83.69%
Best Top-1: 56.42%, Best Top-5: 83.69%

Epoch 30/30
Learning rate: 0.001000


Training: 100%|██████████| 391/391 [00:43<00:00,  8.94it/s, loss=1.52, acc=58]   
Testing: 100%|██████████| 79/79 [00:14<00:00,  5.64it/s, loss=1.59, top1=56.6, top5=83.8] 

Train Loss: 1.5176, Train Acc: 58.00%
Test Loss: 1.5883, Top-1 Acc: 56.64%, Top-5 Acc: 83.81%
Best Top-1: 56.64%, Best Top-5: 83.81%

FINAL RESULTS COMPARISON
Model                          Top-1 Accuracy       Top-5 Accuracy      
----------------------------------------------------------------------
No BatchNorm                                 1.00%               5.00%
BatchNorm Before ReLU                       48.16%              79.88%
BatchNorm After ReLU                        56.64%              83.81%



