In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
# Import data loaders from your data preparation script
import import_ipynb
import alexnet_dataTransformation 
# import nbimporter

from alexnet_dataTransformation import train_loader, val_loader, test_loader, class_to_idx
from torch.utils.tensorboard import SummaryWriter

In [14]:
from torchsummary import summary

In [5]:
# writer = SummaryWriter('runs/tensor_board')
tensor_board_log_dir = f"tensor_board_logs/runs_{time.strftime('%Y%m%d-%H%M%S')}"
writer = SummaryWriter(tensor_board_log_dir)

Bias initialization: \
• 2nd, 4th, and 5th convolutional layers with constant 1 and all fully connected 
hidden layers with 1 \
• The remaining layers with 0 

In [None]:
# AlexNet model implementation with proper initialization as per paper and asignment
# Initialize all layers with random weights taken from ~𝑁(0, 0.01) 
# Bias initialization: 
# • 2nd, 4th, and 5th convolutional layers with constant 1 and all fully connected hidden layers with 1 
# • The remaining layers with 0 
class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()
        
        # Features (convolutional layers)
        
        # First convolutional layer (bias=0)
        self.conv1 = nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2)
        self.relu1 = nn.ReLU(inplace=True)
        self.lrn1 = nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        
        # Second convolutional layer (bias=1)
        self.conv2 = nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=2)
        self.relu2 = nn.ReLU(inplace=True)
        self.lrn2 = nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        
        # Third convolutional layer (bias=0)
        self.conv3 = nn.Conv2d(256, 384, kernel_size=3, padding=1)
        self.relu3 = nn.ReLU(inplace=True)
        
        # Fourth convolutional layer (bias=1)
        self.conv4 = nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=2)
        self.relu4 = nn.ReLU(inplace=True)
        
        # Fifth convolutional layer (bias=1)
        self.conv5 = nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=2)
        self.relu5 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)
        
        # Classifier (fully connected layers)
        self.dropout1 = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(256 * 6 * 6, 4096)
        self.relu6 = nn.ReLU(inplace=True)
        self.dropout2 = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(4096, 4096)
        self.relu7 = nn.ReLU(inplace=True)
        self.fc3 = nn.Linear(4096, num_classes)
        
        # Initialize weights according to paper specifications
        self._initialize_weights()
        
    def forward(self, x):
        # Convolutional layers
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.lrn1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.lrn2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        
        x = self.conv4(x)
        x = self.relu4(x)
        
        x = self.conv5(x)
        x = self.relu5(x)
        x = self.pool5(x)
        
        # Flatten
        x = x.view(x.size(0), 256 * 6 * 6)
        
        # Fully connected layers
        x = self.dropout1(x)
        x = self.fc1(x)
        x = self.relu6(x)
        
        x = self.dropout2(x)
        x = self.fc2(x)
        x = self.relu7(x)
        
        x = self.fc3(x)
        return x
    
    def _initialize_weights(self):
        # Initialize all weights from N(0, 0.01)
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0, std=0.01)
        
        # Bias initialization according to paper specifications
        # 2nd, 4th, 5th convolutional layers with constant 1
        nn.init.constant_(self.conv2.bias, 1)
        nn.init.constant_(self.conv4.bias, 1)
        nn.init.constant_(self.conv5.bias, 1)
        
        # Fully connected hidden layers with constant 1
        nn.init.constant_(self.fc1.bias, 1)
        nn.init.constant_(self.fc2.bias, 1)
        
        # Remaining layers with 0 (already default in PyTorch)
        nn.init.constant_(self.conv1.bias, 0)
        nn.init.constant_(self.conv3.bias, 0)
        nn.init.constant_(self.fc3.bias, 0)

In [None]:
# Function to compute top-k accuracy
def accuracy(output, target, topk=(1,)):
    """
    Computes the accuracy over the k top predictions for the specified values of k
    """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        # _, pred = output.topk(maxk, dim = 1, largest=True, sorted = True)
        pred = pred.t()
        
        # Performs element-wise equality check between pred and expanded target
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

# Training function
def train(model, train_loader, optimizer, criterion, epoch, device):
    model.train()
    running_loss = 0.0
    correct1 = 0
    correct5 = 0
    total = 0
    
    pbar = tqdm(train_loader, desc=f'Epoch {epoch}')
    for inputs, targets in pbar:
        # Move inputs and targets to device
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item() * inputs.size(0)
        acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
        total += targets.size(0)
        correct1 += acc1.item() * inputs.size(0) / 100
        correct5 += acc5.item() * inputs.size(0) / 100
        
        # Update progress bar
        pbar.set_postfix({
            'loss': running_loss / total,
            'top1': 100. * correct1 / total,
            'top5': 100. * correct5 / total
        })
    
    train_loss = running_loss / total
    train_acc1 = 100. * correct1 / total
    train_acc5 = 100. * correct5 / total
    train_err1 = 100. - train_acc1
    train_err5 = 100. - train_acc5
    
    return train_loss, train_acc1, train_acc5, train_err1, train_err5

# Validation function
def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct1 = 0
    correct5 = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in tqdm(val_loader, desc='Validating'):
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            running_loss += loss.item() * inputs.size(0)
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            total += targets.size(0)
            correct1 += acc1.item() * inputs.size(0) / 100
            correct5 += acc5.item() * inputs.size(0) / 100
    
    val_loss = running_loss / total
    val_acc1 = 100. * correct1 / total
    val_acc5 = 100. * correct5 / total
    val_err1 = 100. - val_acc1
    val_err5 = 100. - val_acc5
    
    return val_loss, val_acc1, val_acc5, val_err1, val_err5


In [9]:
# Function to plot and save training/validation curves
def plot_curves(train_values, val_values, ylabel, title, filename):
    plt.figure(figsize=(10, 6))
    plt.plot(train_values, label='Training')
    plt.plot(val_values, label='Validation')
    plt.xlabel('Epoch')
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.savefig(filename)
    plt.close()
    # plt.show()

In [15]:
# Generate model parameter report
model_to_checkParameter = AlexNet(num_classes=100)
summary(model_to_checkParameter, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 55, 55]          34,944
              ReLU-2           [-1, 96, 55, 55]               0
 LocalResponseNorm-3           [-1, 96, 55, 55]               0
         MaxPool2d-4           [-1, 96, 27, 27]               0
            Conv2d-5          [-1, 256, 27, 27]         307,456
              ReLU-6          [-1, 256, 27, 27]               0
 LocalResponseNorm-7          [-1, 256, 27, 27]               0
         MaxPool2d-8          [-1, 256, 13, 13]               0
            Conv2d-9          [-1, 384, 13, 13]         885,120
             ReLU-10          [-1, 384, 13, 13]               0
           Conv2d-11          [-1, 384, 13, 13]         663,936
             ReLU-12          [-1, 384, 13, 13]               0
           Conv2d-13          [-1, 256, 13, 13]         442,624
             ReLU-14          [-1, 256,

In [10]:
# Main training loop
def main():
    # Check if GPU is available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
   
    # Create model
    model = AlexNet(num_classes=len(class_to_idx))
    model = model.to(device)
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    
    # Following the AlexNet paper:
    # - learning rate: 0.01
    # - momentum: 0.9
    # - weight decay: 0.0005
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
    
    # Create directories
    save_dir = 'alexnet_checkpoints'
    plot_dir = 'alexnet_plots'
    os.makedirs(save_dir, exist_ok=True)
    os.makedirs(plot_dir, exist_ok=True)
    
    # Training parameters
    num_epochs = 50  # AlexNet was trained for 90 epochs
    patience = 5    # Patience for learning rate reduction
    
    # Lists to track metrics
    train_losses = []
    val_losses = []
    train_accs1 = []
    val_accs1 = []
    train_accs5 = []
    val_accs5 = []
    train_errs1 = []
    val_errs1 = []
    train_errs5 = []
    val_errs5 = []
    learning_rates = []
    
    # Training loop
    best_val_acc = 0.0
    no_improve_count = 0
    current_lr = optimizer.param_groups[0]['lr']
    
    for epoch in range(1, num_epochs + 1):
        # Record current learning rate
        learning_rates.append(current_lr)
        
        # Train
        train_loss, train_acc1, train_acc5, train_err1, train_err5 = train(
            model, train_loader, optimizer, criterion, epoch, device
        )
        # Log training metrics
        writer.add_scalar('Loss/Train', train_loss, epoch)
        writer.add_scalar('Accuracy/Top1_Train', train_acc1, epoch)
        writer.add_scalar('Accuracy/Top5_Train', train_acc5, epoch)
        writer.add_scalar('Error/Top1_Train', train_err1, epoch)
        writer.add_scalar('Error/Top5_Train', train_err5, epoch)
        
        # Validate
        val_loss, val_acc1, val_acc5, val_err1, val_err5 = validate(
            model, val_loader, criterion, device
        )

        # Log validation metrics
        writer.add_scalar('Loss/Validation', val_loss, epoch)
        writer.add_scalar('Accuracy/Top1_Validation', val_acc1, epoch)
        writer.add_scalar('Accuracy/Top5_Validation', val_acc5, epoch)
        writer.add_scalar('Error/Top1_Validation', val_err1, epoch)
        writer.add_scalar('Error/Top5_Validation', val_err5, epoch)
        
        # Store metrics
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs1.append(train_acc1)
        val_accs1.append(val_acc1)
        train_accs5.append(train_acc5)
        val_accs5.append(val_acc5)
        train_errs1.append(train_err1)
        val_errs1.append(val_err1)
        train_errs5.append(train_err5)
        val_errs5.append(val_err5)
        
        # Print statistics
        print(f'Epoch {epoch}/{num_epochs}:')
        print(f'  Training Loss: {train_loss:.4f}, Top-1 Acc: {train_acc1:.2f}%, Top-5 Acc: {train_acc5:.2f}%')
        print(f'  Validation Loss: {val_loss:.4f}, Top-1 Acc: {val_acc1:.2f}%, Top-5 Acc: {val_acc5:.2f}%')
        print(f'  Top-1 Error: {val_err1:.2f}%, Top-5 Error: {val_err5:.2f}%')
        print(f'  Learning Rate: {current_lr:.6f}')
        
        # Save checkpoint
        is_best = val_acc1 > best_val_acc
        if is_best:
            print(f'Validation accuracy improved from {best_val_acc:.2f}% to {val_acc1:.2f}%')
            best_val_acc = val_acc1
            no_improve_count = 0
                        
            checkpoint = {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc1': val_acc1,
                'val_acc5': val_acc5,
                'train_acc1': train_acc1,
                'train_acc5': train_acc5,
                'val_loss': val_loss,
                'train_loss': train_loss
            }
            
            torch.save(checkpoint, os.path.join(save_dir, 'best_model.pth'))
            print(f'Model saved to {os.path.join(save_dir, "best_model.pth")}')
        else:
            no_improve_count += 1
            print(f'No improvement for {no_improve_count} epochs')
        
        # Learning rate scheduling: reduce by factor of 10 if no improvement for 'patience' epochs
        if no_improve_count >= patience:
            current_lr = current_lr / 10
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_lr
            print(f'Learning rate reduced to {current_lr:.6f}')
            no_improve_count = 0
        
        # Create plots after each epoch
        plot_curves(train_losses, val_losses, 'Loss', 'Training and Validation Loss', 
                    os.path.join(plot_dir, 'loss_curve.png'))
        plot_curves(train_accs1, val_accs1, 'Top-1 Accuracy (%)', 'Training and Validation Top-1 Accuracy', 
                    os.path.join(plot_dir, 'top1_acc_curve.png'))
        plot_curves(train_accs5, val_accs5, 'Top-5 Accuracy (%)', 'Training and Validation Top-5 Accuracy', 
                    os.path.join(plot_dir, 'top5_acc_curve.png'))
        plot_curves(train_errs1, val_errs1, 'Top-1 Error (%)', 'Training and Validation Top-1 Error', 
                    os.path.join(plot_dir, 'top1_err_curve.png'))
        plot_curves(train_errs5, val_errs5, 'Top-5 Error (%)', 'Training and Validation Top-5 Error', 
                    os.path.join(plot_dir, 'top5_err_curve.png'))
  
       
        # Save training statistics
        stats = {
            'epoch': list(range(1, epoch + 1)),
            'train_loss': train_losses,
            'val_loss': val_losses,
            'train_acc1': train_accs1,
            'val_acc1': val_accs1,
            'train_acc5': train_accs5,
            'val_acc5': val_accs5,
            'train_err1': train_errs1,
            'val_err1': val_errs1,
            'train_err5': train_errs5,
            'val_err5': val_errs5,
            'learning_rate': learning_rates
        }
        pd.DataFrame(stats).to_csv(os.path.join(plot_dir, 'training_stats.csv'), index=False)
    
    # Final evaluation on test set
    model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth'))['model_state_dict'])
    test_loss, test_acc1, test_acc5, test_err1, test_err5 = validate(model, test_loader, criterion, device)
    print(f'Final Test Results:')
    print(f'  Test Loss: {test_loss:.4f}')
    print(f'  Top-1 Accuracy: {test_acc1:.2f}%, Top-1 Error: {test_err1:.2f}%')
    print(f'  Top-5 Accuracy: {test_acc5:.2f}%, Top-5 Error: {test_err5:.2f}%')
    
    # Save final test results
    with open(os.path.join(plot_dir, 'test_results.txt'), 'w') as f:
        f.write(f'Test Loss: {test_loss:.4f}\n')
        f.write(f'Top-1 Accuracy: {test_acc1:.2f}%, Top-1 Error: {test_err1:.2f}%\n')
        f.write(f'Top-5 Accuracy: {test_acc5:.2f}%, Top-5 Error: {test_err5:.2f}%\n')

if __name__ == "__main__":
    import pandas as pd
    main()

Using device: cpu


Epoch 1: 100%|██████████| 235/235 [20:28<00:00,  5.23s/it, loss=4.69, top1=1.02, top5=5.05] 
Validating: 100%|██████████| 79/79 [03:06<00:00,  2.37s/it]


Epoch 1/50:
  Training Loss: 4.6944, Top-1 Acc: 1.02%, Top-5 Acc: 5.05%
  Validation Loss: 4.6066, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.010000
Validation accuracy improved from 0.00% to 1.00%
Model saved to alexnet_checkpoints\best_model.pth


Epoch 2: 100%|██████████| 235/235 [16:49<00:00,  4.29s/it, loss=4.61, top1=1.01, top5=4.9]  
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.08s/it]


Epoch 2/50:
  Training Loss: 4.6082, Top-1 Acc: 1.01%, Top-5 Acc: 4.90%
  Validation Loss: 4.6057, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.010000
No improvement for 1 epochs


Epoch 3: 100%|██████████| 235/235 [19:27<00:00,  4.97s/it, loss=4.61, top1=0.987, top5=4.74]
Validating: 100%|██████████| 79/79 [02:46<00:00,  2.11s/it]


Epoch 3/50:
  Training Loss: 4.6072, Top-1 Acc: 0.99%, Top-5 Acc: 4.74%
  Validation Loss: 4.6058, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.010000
No improvement for 2 epochs


Epoch 4: 100%|██████████| 235/235 [19:06<00:00,  4.88s/it, loss=4.61, top1=0.997, top5=5.03]
Validating: 100%|██████████| 79/79 [02:45<00:00,  2.09s/it]


Epoch 4/50:
  Training Loss: 4.6064, Top-1 Acc: 1.00%, Top-5 Acc: 5.03%
  Validation Loss: 4.6055, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.010000
No improvement for 3 epochs


Epoch 5: 100%|██████████| 235/235 [19:23<00:00,  4.95s/it, loss=4.61, top1=0.843, top5=4.58]
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.08s/it]


Epoch 5/50:
  Training Loss: 4.6063, Top-1 Acc: 0.84%, Top-5 Acc: 4.58%
  Validation Loss: 4.6053, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.010000
No improvement for 4 epochs


Epoch 6: 100%|██████████| 235/235 [19:26<00:00,  4.97s/it, loss=4.61, top1=0.9, top5=4.76]  
Validating: 100%|██████████| 79/79 [02:46<00:00,  2.11s/it]


Epoch 6/50:
  Training Loss: 4.6064, Top-1 Acc: 0.90%, Top-5 Acc: 4.76%
  Validation Loss: 4.6056, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.010000
No improvement for 5 epochs
Learning rate reduced to 0.001000


Epoch 7: 100%|██████████| 235/235 [18:51<00:00,  4.81s/it, loss=4.61, top1=0.913, top5=4.97]
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.09s/it]


Epoch 7/50:
  Training Loss: 4.6058, Top-1 Acc: 0.91%, Top-5 Acc: 4.97%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.001000
No improvement for 1 epochs


Epoch 8: 100%|██████████| 235/235 [18:57<00:00,  4.84s/it, loss=4.61, top1=0.99, top5=5.01] 
Validating: 100%|██████████| 79/79 [02:46<00:00,  2.11s/it]


Epoch 8/50:
  Training Loss: 4.6053, Top-1 Acc: 0.99%, Top-5 Acc: 5.01%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.001000
No improvement for 2 epochs


Epoch 9: 100%|██████████| 235/235 [19:29<00:00,  4.98s/it, loss=4.61, top1=0.993, top5=4.95]
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.09s/it]


Epoch 9/50:
  Training Loss: 4.6053, Top-1 Acc: 0.99%, Top-5 Acc: 4.95%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.001000
No improvement for 3 epochs


Epoch 10: 100%|██████████| 235/235 [18:57<00:00,  4.84s/it, loss=4.61, top1=0.987, top5=4.72]
Validating: 100%|██████████| 79/79 [02:45<00:00,  2.09s/it]


Epoch 10/50:
  Training Loss: 4.6053, Top-1 Acc: 0.99%, Top-5 Acc: 4.72%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.001000
No improvement for 4 epochs


Epoch 11: 100%|██████████| 235/235 [19:10<00:00,  4.90s/it, loss=4.61, top1=1.05, top5=4.89]
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.08s/it]


Epoch 11/50:
  Training Loss: 4.6053, Top-1 Acc: 1.05%, Top-5 Acc: 4.89%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.001000
No improvement for 5 epochs
Learning rate reduced to 0.000100


Epoch 12: 100%|██████████| 235/235 [19:07<00:00,  4.88s/it, loss=4.61, top1=0.977, top5=5.05]
Validating: 100%|██████████| 79/79 [02:43<00:00,  2.08s/it]


Epoch 12/50:
  Training Loss: 4.6052, Top-1 Acc: 0.98%, Top-5 Acc: 5.05%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000100
No improvement for 1 epochs


Epoch 13: 100%|██████████| 235/235 [18:59<00:00,  4.85s/it, loss=4.61, top1=0.957, top5=5.06]
Validating: 100%|██████████| 79/79 [02:46<00:00,  2.10s/it]


Epoch 13/50:
  Training Loss: 4.6052, Top-1 Acc: 0.96%, Top-5 Acc: 5.06%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000100
No improvement for 2 epochs


Epoch 14: 100%|██████████| 235/235 [19:16<00:00,  4.92s/it, loss=4.61, top1=1.11, top5=4.88]
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.09s/it]


Epoch 14/50:
  Training Loss: 4.6053, Top-1 Acc: 1.11%, Top-5 Acc: 4.88%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000100
No improvement for 3 epochs


Epoch 15: 100%|██████████| 235/235 [18:56<00:00,  4.84s/it, loss=4.61, top1=0.977, top5=5.04]
Validating: 100%|██████████| 79/79 [02:46<00:00,  2.11s/it]


Epoch 15/50:
  Training Loss: 4.6053, Top-1 Acc: 0.98%, Top-5 Acc: 5.04%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000100
No improvement for 4 epochs


Epoch 16: 100%|██████████| 235/235 [18:46<00:00,  4.79s/it, loss=4.61, top1=1.03, top5=5.17]
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.09s/it]


Epoch 16/50:
  Training Loss: 4.6052, Top-1 Acc: 1.03%, Top-5 Acc: 5.17%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000100
No improvement for 5 epochs
Learning rate reduced to 0.000010


Epoch 17: 100%|██████████| 235/235 [19:32<00:00,  4.99s/it, loss=4.61, top1=0.94, top5=4.71] 
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.08s/it]


Epoch 17/50:
  Training Loss: 4.6054, Top-1 Acc: 0.94%, Top-5 Acc: 4.71%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000010
No improvement for 1 epochs


Epoch 18: 100%|██████████| 235/235 [19:03<00:00,  4.87s/it, loss=4.61, top1=1, top5=4.83]    
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.08s/it]


Epoch 18/50:
  Training Loss: 4.6053, Top-1 Acc: 1.00%, Top-5 Acc: 4.83%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000010
No improvement for 2 epochs


Epoch 19: 100%|██████████| 235/235 [19:09<00:00,  4.89s/it, loss=4.61, top1=1.02, top5=4.91] 
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.08s/it]


Epoch 19/50:
  Training Loss: 4.6053, Top-1 Acc: 1.02%, Top-5 Acc: 4.91%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000010
No improvement for 3 epochs


Epoch 20: 100%|██████████| 235/235 [19:08<00:00,  4.89s/it, loss=4.61, top1=0.937, top5=4.86]
Validating: 100%|██████████| 79/79 [02:45<00:00,  2.09s/it]


Epoch 20/50:
  Training Loss: 4.6053, Top-1 Acc: 0.94%, Top-5 Acc: 4.86%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000010
No improvement for 4 epochs


Epoch 21: 100%|██████████| 235/235 [13:52<00:00,  3.54s/it, loss=4.61, top1=1.03, top5=4.89] 
Validating: 100%|██████████| 79/79 [01:54<00:00,  1.45s/it]


Epoch 21/50:
  Training Loss: 4.6053, Top-1 Acc: 1.03%, Top-5 Acc: 4.89%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000010
No improvement for 5 epochs
Learning rate reduced to 0.000001


Epoch 22: 100%|██████████| 235/235 [47:48<00:00, 12.21s/it, loss=4.61, top1=1.07, top5=4.9]     
Validating: 100%|██████████| 79/79 [03:40<00:00,  2.79s/it]


Epoch 22/50:
  Training Loss: 4.6052, Top-1 Acc: 1.07%, Top-5 Acc: 4.90%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000001
No improvement for 1 epochs


Epoch 23: 100%|██████████| 235/235 [15:34<00:00,  3.98s/it, loss=4.61, top1=1.02, top5=4.91]
Validating: 100%|██████████| 79/79 [02:00<00:00,  1.52s/it]


Epoch 23/50:
  Training Loss: 4.6052, Top-1 Acc: 1.02%, Top-5 Acc: 4.91%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000001
No improvement for 2 epochs


Epoch 24: 100%|██████████| 235/235 [18:29<00:00,  4.72s/it, loss=4.61, top1=0.967, top5=4.98]
Validating: 100%|██████████| 79/79 [01:54<00:00,  1.45s/it]


Epoch 24/50:
  Training Loss: 4.6053, Top-1 Acc: 0.97%, Top-5 Acc: 4.98%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000001
No improvement for 3 epochs


Epoch 25: 100%|██████████| 235/235 [4:53:31<00:00, 74.94s/it, loss=4.61, top1=0.987, top5=5.11]     
Validating: 100%|██████████| 79/79 [05:44<00:00,  4.37s/it]


Epoch 25/50:
  Training Loss: 4.6052, Top-1 Acc: 0.99%, Top-5 Acc: 5.11%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000001
No improvement for 4 epochs


Epoch 26: 100%|██████████| 235/235 [23:20<00:00,  5.96s/it, loss=4.61, top1=1.04, top5=5.13]
Validating: 100%|██████████| 79/79 [01:54<00:00,  1.45s/it]


Epoch 26/50:
  Training Loss: 4.6053, Top-1 Acc: 1.04%, Top-5 Acc: 5.13%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000001
No improvement for 5 epochs
Learning rate reduced to 0.000000


Epoch 27: 100%|██████████| 235/235 [18:04<00:00,  4.61s/it, loss=4.61, top1=1.12, top5=5.05]
Validating: 100%|██████████| 79/79 [02:55<00:00,  2.23s/it]


Epoch 27/50:
  Training Loss: 4.6053, Top-1 Acc: 1.12%, Top-5 Acc: 5.05%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 1 epochs


Epoch 28: 100%|██████████| 235/235 [18:43<00:00,  4.78s/it, loss=4.61, top1=1, top5=4.88]    
Validating: 100%|██████████| 79/79 [02:12<00:00,  1.68s/it]


Epoch 28/50:
  Training Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 4.88%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 2 epochs


Epoch 29: 100%|██████████| 235/235 [16:08<00:00,  4.12s/it, loss=4.61, top1=1.07, top5=5.07]
Validating: 100%|██████████| 79/79 [02:14<00:00,  1.70s/it]


Epoch 29/50:
  Training Loss: 4.6052, Top-1 Acc: 1.07%, Top-5 Acc: 5.07%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 3 epochs


Epoch 30: 100%|██████████| 235/235 [19:46<00:00,  5.05s/it, loss=4.61, top1=1, top5=4.94]    
Validating: 100%|██████████| 79/79 [03:42<00:00,  2.81s/it]


Epoch 30/50:
  Training Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 4.94%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 4 epochs


Epoch 31: 100%|██████████| 235/235 [25:22<00:00,  6.48s/it, loss=4.61, top1=1.01, top5=5.04] 
Validating: 100%|██████████| 79/79 [04:22<00:00,  3.32s/it]


Epoch 31/50:
  Training Loss: 4.6052, Top-1 Acc: 1.01%, Top-5 Acc: 5.04%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 5 epochs
Learning rate reduced to 0.000000


Epoch 32: 100%|██████████| 235/235 [24:35<00:00,  6.28s/it, loss=4.61, top1=1.07, top5=5.19] 
Validating: 100%|██████████| 79/79 [02:44<00:00,  2.08s/it]


Epoch 32/50:
  Training Loss: 4.6052, Top-1 Acc: 1.07%, Top-5 Acc: 5.19%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 1 epochs


Epoch 33: 100%|██████████| 235/235 [22:10<00:00,  5.66s/it, loss=4.61, top1=1.01, top5=5.07] 
Validating: 100%|██████████| 79/79 [03:52<00:00,  2.94s/it]


Epoch 33/50:
  Training Loss: 4.6053, Top-1 Acc: 1.01%, Top-5 Acc: 5.07%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 2 epochs


Epoch 34: 100%|██████████| 235/235 [27:55<00:00,  7.13s/it, loss=4.61, top1=1.1, top5=4.95]  
Validating: 100%|██████████| 79/79 [06:00<00:00,  4.57s/it]


Epoch 34/50:
  Training Loss: 4.6052, Top-1 Acc: 1.10%, Top-5 Acc: 4.95%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 3 epochs


Epoch 35: 100%|██████████| 235/235 [27:48<00:00,  7.10s/it, loss=4.61, top1=0.953, top5=4.96]
Validating: 100%|██████████| 79/79 [05:40<00:00,  4.31s/it]


Epoch 35/50:
  Training Loss: 4.6052, Top-1 Acc: 0.95%, Top-5 Acc: 4.96%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 4 epochs


Epoch 36: 100%|██████████| 235/235 [25:05<00:00,  6.40s/it, loss=4.61, top1=1.01, top5=4.85] 
Validating: 100%|██████████| 79/79 [04:12<00:00,  3.20s/it]


Epoch 36/50:
  Training Loss: 4.6053, Top-1 Acc: 1.01%, Top-5 Acc: 4.85%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 5 epochs
Learning rate reduced to 0.000000


Epoch 37: 100%|██████████| 235/235 [49:00<00:00, 12.51s/it, loss=4.61, top1=1.05, top5=4.96]   
Validating: 100%|██████████| 79/79 [03:02<00:00,  2.31s/it]


Epoch 37/50:
  Training Loss: 4.6053, Top-1 Acc: 1.05%, Top-5 Acc: 4.96%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 1 epochs


Epoch 38: 100%|██████████| 235/235 [16:22<00:00,  4.18s/it, loss=4.61, top1=1.05, top5=5.16]
Validating: 100%|██████████| 79/79 [01:57<00:00,  1.49s/it]


Epoch 38/50:
  Training Loss: 4.6053, Top-1 Acc: 1.05%, Top-5 Acc: 5.16%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 2 epochs


Epoch 39: 100%|██████████| 235/235 [14:03<00:00,  3.59s/it, loss=4.61, top1=1.02, top5=5.22] 
Validating: 100%|██████████| 79/79 [01:56<00:00,  1.48s/it]


Epoch 39/50:
  Training Loss: 4.6052, Top-1 Acc: 1.02%, Top-5 Acc: 5.22%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 3 epochs


Epoch 40: 100%|██████████| 235/235 [14:10<00:00,  3.62s/it, loss=4.61, top1=1, top5=5.1]     
Validating: 100%|██████████| 79/79 [02:11<00:00,  1.66s/it]


Epoch 40/50:
  Training Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.10%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 4 epochs


Epoch 41: 100%|██████████| 235/235 [15:56<00:00,  4.07s/it, loss=4.61, top1=1.03, top5=5.16] 
Validating: 100%|██████████| 79/79 [02:09<00:00,  1.65s/it]


Epoch 41/50:
  Training Loss: 4.6052, Top-1 Acc: 1.03%, Top-5 Acc: 5.16%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 5 epochs
Learning rate reduced to 0.000000


Epoch 42: 100%|██████████| 235/235 [16:55<00:00,  4.32s/it, loss=4.61, top1=0.97, top5=5.01] 
Validating: 100%|██████████| 79/79 [03:22<00:00,  2.56s/it]


Epoch 42/50:
  Training Loss: 4.6052, Top-1 Acc: 0.97%, Top-5 Acc: 5.01%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 1 epochs


Epoch 43: 100%|██████████| 235/235 [21:06<00:00,  5.39s/it, loss=4.61, top1=0.94, top5=4.96] 
Validating: 100%|██████████| 79/79 [03:36<00:00,  2.74s/it]


Epoch 43/50:
  Training Loss: 4.6053, Top-1 Acc: 0.94%, Top-5 Acc: 4.96%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 2 epochs


Epoch 44: 100%|██████████| 235/235 [26:31<00:00,  6.77s/it, loss=4.61, top1=0.98, top5=5.07] 
Validating: 100%|██████████| 79/79 [04:39<00:00,  3.53s/it]


Epoch 44/50:
  Training Loss: 4.6052, Top-1 Acc: 0.98%, Top-5 Acc: 5.07%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 3 epochs


Epoch 45: 100%|██████████| 235/235 [25:23<00:00,  6.48s/it, loss=4.61, top1=0.94, top5=4.68] 
Validating: 100%|██████████| 79/79 [04:57<00:00,  3.76s/it]


Epoch 45/50:
  Training Loss: 4.6053, Top-1 Acc: 0.94%, Top-5 Acc: 4.68%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 4 epochs


Epoch 46: 100%|██████████| 235/235 [23:07<00:00,  5.90s/it, loss=4.61, top1=1.04, top5=4.91] 
Validating: 100%|██████████| 79/79 [03:20<00:00,  2.54s/it]


Epoch 46/50:
  Training Loss: 4.6053, Top-1 Acc: 1.04%, Top-5 Acc: 4.91%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 5 epochs
Learning rate reduced to 0.000000


Epoch 47: 100%|██████████| 235/235 [20:26<00:00,  5.22s/it, loss=4.61, top1=1.01, top5=4.98] 
Validating: 100%|██████████| 79/79 [04:12<00:00,  3.20s/it]


Epoch 47/50:
  Training Loss: 4.6053, Top-1 Acc: 1.01%, Top-5 Acc: 4.98%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 1 epochs


Epoch 48: 100%|██████████| 235/235 [26:50<00:00,  6.85s/it, loss=4.61, top1=1.07, top5=5.36]
Validating: 100%|██████████| 79/79 [05:15<00:00,  3.99s/it]


Epoch 48/50:
  Training Loss: 4.6052, Top-1 Acc: 1.07%, Top-5 Acc: 5.36%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 2 epochs


Epoch 49: 100%|██████████| 235/235 [28:59<00:00,  7.40s/it, loss=4.61, top1=1.08, top5=4.91] 
Validating: 100%|██████████| 79/79 [03:24<00:00,  2.59s/it]


Epoch 49/50:
  Training Loss: 4.6053, Top-1 Acc: 1.08%, Top-5 Acc: 4.91%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 3 epochs


Epoch 50: 100%|██████████| 235/235 [19:00<00:00,  4.85s/it, loss=4.61, top1=0.977, top5=5.07]
Validating: 100%|██████████| 79/79 [02:58<00:00,  2.26s/it]


Epoch 50/50:
  Training Loss: 4.6051, Top-1 Acc: 0.98%, Top-5 Acc: 5.07%
  Validation Loss: 4.6052, Top-1 Acc: 1.00%, Top-5 Acc: 5.00%
  Top-1 Error: 99.00%, Top-5 Error: 95.00%
  Learning Rate: 0.000000
No improvement for 4 epochs


  model.load_state_dict(torch.load(os.path.join(save_dir, 'best_model.pth'))['model_state_dict'])
Validating: 100%|██████████| 79/79 [03:42<00:00,  2.82s/it]

Final Test Results:
  Test Loss: 4.6066
  Top-1 Accuracy: 1.00%, Top-1 Error: 99.00%
  Top-5 Accuracy: 5.00%, Top-5 Error: 95.00%



