In [1]:
import torch

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## Custom CNN Network using VGG Blocks

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class CustomVGG(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomVGG, self).__init__()
        
        # VGG blocks
        self.block1 = self._make_vgg_block(3, 64, 2)
        self.block2 = self._make_vgg_block(64, 128, 2)
        self.block3 = self._make_vgg_block(128, 256, 3)
        self.block4 = self._make_vgg_block(256, 512, 3)
        self.block5 = self._make_vgg_block(512, 512, 3)
        
        # Fully connected layers
        self.fc1 = nn.Linear(512 * 7 * 7, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, num_classes)
        
        # Dropout layers
        self.dropout = nn.Dropout(0.5)
        
    def _make_vgg_block(self, in_channels, out_channels, num_convs):
        layers = []
        for _ in range(num_convs):
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channels = out_channels
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        
        x = torch.flatten(x, 1)
        
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        
        return x

## Data Loader

In [3]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

data_dir = 'ImageNet/'
def get_data_loaders(data_dir, batch_size):
    train_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    val_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transforms)
    val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=val_transforms)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    return train_loader, val_loader

## Optimizers

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

def load_resnet50(num_classes):
    # Load pre-trained ResNet50
    model =  models.resnet50(weights='ResNet50_Weights.DEFAULT')
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    return model

def load_vgg16(num_classes):
    model = models.vgg16(weights='VGG16_Weights.DEFAULT')
    num_features = model.classifier[6].in_features
    model.classifier[6] = nn.Linear(num_features, num_classes)
    return model

def load_vgg19(num_classes):
    model = models.vgg19(weights='VGG19_Weights.DEFAULT')
    num_features = model.classifier[6].in_features
    model.classifier[6] = nn.Linear(num_features, num_classes)
    return model

In [5]:
import torch.optim as optim

# Function to create and return the model and optimizer
def create_model_optimizer(optimizer_name, lr):
    # model = CustomVGG(num_classes=10).to(device)
    # model = load_resnet50(num_classes=10).to(device)
    model = load_vgg16(num_classes=10).to(device)
    # model = load_vgg19(num_classes=10).to(device)
    optimizers = {
        'SGD': optim.SGD(model.parameters(), lr=lr, momentum=0.9),
        'Adam': optim.Adam(model.parameters(), lr=lr),
        'RMSprop': optim.RMSprop(model.parameters(), lr=lr),
        'AdamW': optim.AdamW(model.parameters(), lr=lr),
        'Adamax': optim.Adamax(model.parameters(), lr=lr),
        'SparseAdam': optim.SparseAdam(model.parameters(), lr=lr),
        'RAdam': optim.RAdam(model.parameters(), lr=lr),
        'NAdam': optim.NAdam(model.parameters(), lr=lr)
    }
    optimizer = optimizers[optimizer_name]
    return model, optimizer

# Training Loop

In [6]:
import pandas as pd
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast

# Function to train and evaluate the model
def train_and_evaluate(optimizer_name, lr, train_loader, val_loader):
    model, optimizer = create_model_optimizer(optimizer_name, lr)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    scaler = GradScaler()

    best_val_acc = 0
    best_model_wts = None

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    # Initialize lists to store epoch-wise data
    epoch_data = {
        'Epoch': [],
        'Train Loss': [],
        'Train Acc': [],
        'Val Loss': [],
        'Val Acc': []
    }

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        loop = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        for inputs, labels in loop:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loop.set_postfix(loss=loss.item())
        
        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        val_loss, val_correct, val_total = 0.0, 0, 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        scheduler.step()

        # Add epoch-wise data to the dictionary
        epoch_data['Epoch'].append(epoch + 1)
        epoch_data['Train Loss'].append(train_loss)
        epoch_data['Train Acc'].append(train_accuracy)
        epoch_data['Val Loss'].append(val_loss)
        epoch_data['Val Acc'].append(val_accuracy)

        print(f'Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%')

        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            best_model_wts = model.state_dict().copy()
      
    print(f'Best Validation Accuracy: {best_val_acc:.2f}%')
    if best_model_wts:
        model.load_state_dict(best_model_wts)
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'best_val_acc': best_val_acc
    }, f'model_{optimizer_name}_batch_size_{batch_size}.pth')

    # Convert epoch-wise data dictionary to DataFrame
    df = pd.DataFrame(epoch_data)

    # Save DataFrame to CSV file
    df.to_csv(f'epoch_data_{optimizer_name}_batch_size_{batch_size}.csv', index=False)

    return train_losses, val_losses, train_accuracies, val_accuracies

In [7]:
# List of optimizers to be used
# optimizers_list = ['SGD', 'Adam', 'RMSprop', 'AdamW', 'Adamax', 'SparseAdam', 'RAdam', 'NAdam']
optimizers_list = ['SGD', 'Adam', 'RMSprop', 'AdamW', 'Adamax', 'RAdam', 'NAdam']
# optimizers_list = ['SparseAdam']

# List of batch sizes to be used
batch_sizes = [8, 16, 32]
lr = 0.0001
num_epochs = 10

# Train and evaluate for different optimizers and batch sizes
loss_dict, acc_dict = {}, {}

for optimizer_name in optimizers_list:
    for batch_size in batch_sizes:
        print(f'Training with optimizer {optimizer_name} and batch size {batch_size}')
        train_loader, val_loader = get_data_loaders(data_dir, batch_size)
        train_losses, val_losses, train_accuracies, val_accuracies = train_and_evaluate(optimizer_name, lr, train_loader, val_loader)
        key = f'{optimizer_name}_bs{batch_size}'
        loss_dict[key] = {'train': train_losses, 'val': val_losses}
        acc_dict[key] = {'train': train_accuracies, 'val': val_accuracies}

Training with optimizer SGD and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.39it/s, loss=2.81]


Epoch [1/10] - Train Loss: 2.6597, Train Acc: 7.70%, Val Loss: 2.5038, Val Acc: 8.00%


Epoch [2/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.24it/s, loss=3.05]


Epoch [2/10] - Train Loss: 2.6641, Train Acc: 7.20%, Val Loss: 2.5038, Val Acc: 8.00%


Epoch [3/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.23it/s, loss=2.21]


Epoch [3/10] - Train Loss: 2.6656, Train Acc: 8.80%, Val Loss: 2.5037, Val Acc: 8.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.16it/s, loss=2.81]


Epoch [4/10] - Train Loss: 2.6625, Train Acc: 8.50%, Val Loss: 2.5037, Val Acc: 8.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.25it/s, loss=2.37]


Epoch [5/10] - Train Loss: 2.6618, Train Acc: 8.10%, Val Loss: 2.5037, Val Acc: 8.00%


Epoch [6/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.35it/s, loss=2.24]


Epoch [6/10] - Train Loss: 2.6869, Train Acc: 8.80%, Val Loss: 2.5037, Val Acc: 8.00%


Epoch [7/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.17it/s, loss=2.39]


Epoch [7/10] - Train Loss: 2.6707, Train Acc: 8.20%, Val Loss: 2.5037, Val Acc: 8.00%


Epoch [8/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.09it/s, loss=2.78]


Epoch [8/10] - Train Loss: 2.6611, Train Acc: 8.50%, Val Loss: 2.5037, Val Acc: 8.00%


Epoch [9/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.36it/s, loss=2.92]


Epoch [9/10] - Train Loss: 2.6387, Train Acc: 6.90%, Val Loss: 2.5037, Val Acc: 8.00%


Epoch [10/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:13<00:00,  9.41it/s, loss=3.03]


Epoch [10/10] - Train Loss: 2.6879, Train Acc: 7.80%, Val Loss: 2.5037, Val Acc: 8.00%
Best Validation Accuracy: 8.00%
Training with optimizer SGD and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.71it/s, loss=2.58]


Epoch [1/10] - Train Loss: 2.6958, Train Acc: 9.80%, Val Loss: 2.5407, Val Acc: 9.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:10<00:00,  5.84it/s, loss=2.87]


Epoch [2/10] - Train Loss: 2.6630, Train Acc: 10.30%, Val Loss: 2.5407, Val Acc: 9.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:10<00:00,  5.77it/s, loss=3.01]


Epoch [3/10] - Train Loss: 2.6825, Train Acc: 9.70%, Val Loss: 2.5407, Val Acc: 9.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████████| 63/63 [00:10<00:00,  5.78it/s, loss=2.5]


Epoch [4/10] - Train Loss: 2.6880, Train Acc: 10.40%, Val Loss: 2.5406, Val Acc: 9.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.68it/s, loss=3]


Epoch [5/10] - Train Loss: 2.6882, Train Acc: 9.60%, Val Loss: 2.5406, Val Acc: 9.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:10<00:00,  5.77it/s, loss=2.81]


Epoch [6/10] - Train Loss: 2.6882, Train Acc: 9.60%, Val Loss: 2.5406, Val Acc: 9.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:10<00:00,  5.80it/s, loss=2.83]


Epoch [7/10] - Train Loss: 2.6820, Train Acc: 9.60%, Val Loss: 2.5406, Val Acc: 9.00%


Epoch [8/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.71it/s, loss=2.25]


Epoch [8/10] - Train Loss: 2.6558, Train Acc: 11.30%, Val Loss: 2.5406, Val Acc: 9.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:10<00:00,  5.75it/s, loss=2.51]


Epoch [9/10] - Train Loss: 2.7000, Train Acc: 8.30%, Val Loss: 2.5406, Val Acc: 9.00%


Epoch [10/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:10<00:00,  5.75it/s, loss=2.34]


Epoch [10/10] - Train Loss: 2.7057, Train Acc: 9.50%, Val Loss: 2.5406, Val Acc: 9.00%
Best Validation Accuracy: 9.00%
Training with optimizer SGD and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.28it/s, loss=2.61]


Epoch [1/10] - Train Loss: 2.4492, Train Acc: 12.00%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.32it/s, loss=2.44]


Epoch [2/10] - Train Loss: 2.4584, Train Acc: 10.10%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.37it/s, loss=2.68]


Epoch [3/10] - Train Loss: 2.5156, Train Acc: 10.70%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.31it/s, loss=2.83]


Epoch [4/10] - Train Loss: 2.4462, Train Acc: 11.00%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.29it/s, loss=2.57]


Epoch [5/10] - Train Loss: 2.4756, Train Acc: 11.70%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.37it/s, loss=2.23]


Epoch [6/10] - Train Loss: 2.4839, Train Acc: 10.20%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.34it/s, loss=2.55]


Epoch [7/10] - Train Loss: 2.5049, Train Acc: 11.30%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [8/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.36it/s, loss=3.07]


Epoch [8/10] - Train Loss: 2.4911, Train Acc: 11.00%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.31it/s, loss=2.69]


Epoch [9/10] - Train Loss: 2.4778, Train Acc: 9.40%, Val Loss: 2.4753, Val Acc: 17.00%


Epoch [10/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.35it/s, loss=2.51]


Epoch [10/10] - Train Loss: 2.4603, Train Acc: 10.50%, Val Loss: 2.4753, Val Acc: 17.00%
Best Validation Accuracy: 17.00%
Training with optimizer Adam and batch size 8


Epoch [1/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.78it/s, loss=0.621]


Epoch [1/10] - Train Loss: 0.9686, Train Acc: 66.20%, Val Loss: 0.7749, Val Acc: 77.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.75it/s, loss=0.768]


Epoch [2/10] - Train Loss: 0.5352, Train Acc: 85.00%, Val Loss: 0.9023, Val Acc: 72.00%


Epoch [3/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:15<00:00,  7.82it/s, loss=0.399]


Epoch [3/10] - Train Loss: 0.6601, Train Acc: 84.40%, Val Loss: 0.8645, Val Acc: 74.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.79it/s, loss=0.0794]


Epoch [4/10] - Train Loss: 0.4653, Train Acc: 86.60%, Val Loss: 0.8991, Val Acc: 83.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.70it/s, loss=0.000834]


Epoch [5/10] - Train Loss: 0.6413, Train Acc: 92.80%, Val Loss: 1.4005, Val Acc: 81.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.74it/s, loss=1.64e-6]


Epoch [6/10] - Train Loss: 0.1512, Train Acc: 98.00%, Val Loss: 1.0343, Val Acc: 86.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:15<00:00,  7.83it/s, loss=0]


Epoch [7/10] - Train Loss: 0.0318, Train Acc: 99.60%, Val Loss: 1.0553, Val Acc: 90.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.76it/s, loss=2.54e-5]


Epoch [8/10] - Train Loss: 0.0054, Train Acc: 99.70%, Val Loss: 0.9468, Val Acc: 92.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.67it/s, loss=0.147]


Epoch [9/10] - Train Loss: 0.0017, Train Acc: 100.00%, Val Loss: 1.1559, Val Acc: 90.00%


Epoch [10/10]: 100%|█████████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.57it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0051, Train Acc: 99.90%, Val Loss: 1.2916, Val Acc: 90.00%
Best Validation Accuracy: 92.00%
Training with optimizer Adam and batch size 16


Epoch [1/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.10it/s, loss=0.286]


Epoch [1/10] - Train Loss: 1.1701, Train Acc: 60.70%, Val Loss: 0.4890, Val Acc: 82.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.13it/s, loss=0.858]


Epoch [2/10] - Train Loss: 0.4724, Train Acc: 84.90%, Val Loss: 0.3399, Val Acc: 90.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.08it/s, loss=0.896]


Epoch [3/10] - Train Loss: 0.2878, Train Acc: 92.20%, Val Loss: 0.5725, Val Acc: 83.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.11it/s, loss=5.26e-6]


Epoch [4/10] - Train Loss: 0.1696, Train Acc: 93.60%, Val Loss: 0.4540, Val Acc: 88.00%


Epoch [5/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.16it/s, loss=0.00375]


Epoch [5/10] - Train Loss: 0.1859, Train Acc: 96.10%, Val Loss: 0.3169, Val Acc: 89.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.09it/s, loss=2.98e-8]


Epoch [6/10] - Train Loss: 0.0123, Train Acc: 99.70%, Val Loss: 0.6003, Val Acc: 91.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.17it/s, loss=1.64e-7]


Epoch [7/10] - Train Loss: 0.0043, Train Acc: 99.90%, Val Loss: 0.5147, Val Acc: 93.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.15it/s, loss=1.49e-8]


Epoch [8/10] - Train Loss: 0.0081, Train Acc: 99.80%, Val Loss: 0.6407, Val Acc: 91.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.13it/s, loss=0]


Epoch [9/10] - Train Loss: 0.0005, Train Acc: 100.00%, Val Loss: 0.7404, Val Acc: 94.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.15it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.8635, Val Acc: 93.00%
Best Validation Accuracy: 94.00%
Training with optimizer Adam and batch size 32


Epoch [1/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.06it/s, loss=0.501]


Epoch [1/10] - Train Loss: 1.2182, Train Acc: 57.30%, Val Loss: 0.3978, Val Acc: 82.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.14it/s, loss=0.153]


Epoch [2/10] - Train Loss: 0.3744, Train Acc: 86.60%, Val Loss: 0.3791, Val Acc: 84.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.06it/s, loss=0.214]


Epoch [3/10] - Train Loss: 0.1690, Train Acc: 95.10%, Val Loss: 0.2768, Val Acc: 86.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.06it/s, loss=0.101]


Epoch [4/10] - Train Loss: 0.1336, Train Acc: 95.90%, Val Loss: 0.3887, Val Acc: 87.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.10it/s, loss=0.754]


Epoch [5/10] - Train Loss: 0.1589, Train Acc: 96.30%, Val Loss: 0.4107, Val Acc: 89.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.12it/s, loss=9.72e-6]


Epoch [6/10] - Train Loss: 0.0338, Train Acc: 98.40%, Val Loss: 0.2319, Val Acc: 92.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.13it/s, loss=3.43e-6]


Epoch [7/10] - Train Loss: 0.0067, Train Acc: 99.60%, Val Loss: 0.0886, Val Acc: 94.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.12it/s, loss=4.93e-6]


Epoch [8/10] - Train Loss: 0.0052, Train Acc: 99.90%, Val Loss: 0.1242, Val Acc: 94.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.07it/s, loss=4.47e-8]


Epoch [9/10] - Train Loss: 0.0078, Train Acc: 99.70%, Val Loss: 0.1852, Val Acc: 94.00%


Epoch [10/10]: 100%|████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.11it/s, loss=0.000348]


Epoch [10/10] - Train Loss: 0.0004, Train Acc: 100.00%, Val Loss: 0.2251, Val Acc: 93.00%
Best Validation Accuracy: 94.00%
Training with optimizer RMSprop and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.41it/s, loss=1.01]


Epoch [1/10] - Train Loss: 1.2598, Train Acc: 59.80%, Val Loss: 0.5953, Val Acc: 80.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.46it/s, loss=0.115]


Epoch [2/10] - Train Loss: 0.5464, Train Acc: 84.10%, Val Loss: 0.5229, Val Acc: 83.00%


Epoch [3/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.51it/s, loss=0.305]


Epoch [3/10] - Train Loss: 0.3942, Train Acc: 89.60%, Val Loss: 0.6234, Val Acc: 85.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.43it/s, loss=0.0116]


Epoch [4/10] - Train Loss: 0.3989, Train Acc: 93.90%, Val Loss: 1.1444, Val Acc: 85.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.40it/s, loss=5.04e-6]


Epoch [5/10] - Train Loss: 0.5660, Train Acc: 92.40%, Val Loss: 1.1175, Val Acc: 86.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.45it/s, loss=0]


Epoch [6/10] - Train Loss: 0.0753, Train Acc: 99.10%, Val Loss: 0.6339, Val Acc: 91.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.47it/s, loss=0]


Epoch [7/10] - Train Loss: 0.0137, Train Acc: 99.90%, Val Loss: 0.8491, Val Acc: 92.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.54it/s, loss=0]


Epoch [8/10] - Train Loss: 0.0083, Train Acc: 99.80%, Val Loss: 1.3056, Val Acc: 91.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.49it/s, loss=0]


Epoch [9/10] - Train Loss: 0.0015, Train Acc: 99.90%, Val Loss: 1.4706, Val Acc: 90.00%


Epoch [10/10]: 100%|█████████████████████████████████████████████████████████| 125/125 [00:14<00:00,  8.51it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 1.3566, Val Acc: 90.00%
Best Validation Accuracy: 92.00%
Training with optimizer RMSprop and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.41it/s, loss=0.83]


Epoch [1/10] - Train Loss: 0.9129, Train Acc: 68.40%, Val Loss: 0.5313, Val Acc: 80.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.41it/s, loss=1.87]


Epoch [2/10] - Train Loss: 0.6356, Train Acc: 81.40%, Val Loss: 0.5585, Val Acc: 77.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.41it/s, loss=0.207]


Epoch [3/10] - Train Loss: 0.4287, Train Acc: 87.30%, Val Loss: 0.3347, Val Acc: 88.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.43it/s, loss=0.0499]


Epoch [4/10] - Train Loss: 0.2710, Train Acc: 93.10%, Val Loss: 0.8463, Val Acc: 83.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.38it/s, loss=0.912]


Epoch [5/10] - Train Loss: 0.2683, Train Acc: 94.80%, Val Loss: 0.7329, Val Acc: 82.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.48it/s, loss=0]


Epoch [6/10] - Train Loss: 0.0169, Train Acc: 99.50%, Val Loss: 0.4963, Val Acc: 91.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.43it/s, loss=0]


Epoch [7/10] - Train Loss: 0.0030, Train Acc: 99.90%, Val Loss: 0.6597, Val Acc: 90.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.47it/s, loss=0]


Epoch [8/10] - Train Loss: 0.0006, Train Acc: 100.00%, Val Loss: 0.7456, Val Acc: 90.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.45it/s, loss=0]


Epoch [9/10] - Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.8145, Val Acc: 90.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 63/63 [00:11<00:00,  5.41it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.8306, Val Acc: 90.00%
Best Validation Accuracy: 91.00%
Training with optimizer RMSprop and batch size 32


Epoch [1/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.17it/s, loss=0.885]


Epoch [1/10] - Train Loss: 1.0081, Train Acc: 66.50%, Val Loss: 0.5287, Val Acc: 84.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.22it/s, loss=0.535]


Epoch [2/10] - Train Loss: 0.3586, Train Acc: 87.90%, Val Loss: 0.6707, Val Acc: 72.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.22it/s, loss=0.19]


Epoch [3/10] - Train Loss: 0.2412, Train Acc: 92.30%, Val Loss: 0.3997, Val Acc: 85.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.20it/s, loss=0.000907]


Epoch [4/10] - Train Loss: 0.1114, Train Acc: 96.10%, Val Loss: 0.6261, Val Acc: 89.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.22it/s, loss=0.141]


Epoch [5/10] - Train Loss: 0.0609, Train Acc: 97.90%, Val Loss: 0.4317, Val Acc: 90.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.18it/s, loss=0.00598]


Epoch [6/10] - Train Loss: 0.0155, Train Acc: 99.60%, Val Loss: 0.2410, Val Acc: 93.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.18it/s, loss=2.55e-6]


Epoch [7/10] - Train Loss: 0.0093, Train Acc: 99.70%, Val Loss: 0.2675, Val Acc: 94.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.24it/s, loss=0]


Epoch [8/10] - Train Loss: 0.0009, Train Acc: 100.00%, Val Loss: 0.3485, Val Acc: 89.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.25it/s, loss=1.49e-8]


Epoch [9/10] - Train Loss: 0.0003, Train Acc: 100.00%, Val Loss: 0.3141, Val Acc: 94.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 32/32 [00:09<00:00,  3.22it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0001, Train Acc: 100.00%, Val Loss: 0.3530, Val Acc: 95.00%
Best Validation Accuracy: 95.00%
Training with optimizer AdamW and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.50it/s, loss=1.16]


Epoch [1/10] - Train Loss: 0.9259, Train Acc: 68.70%, Val Loss: 0.3700, Val Acc: 89.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.47it/s, loss=0.0266]


Epoch [2/10] - Train Loss: 0.4413, Train Acc: 87.00%, Val Loss: 0.5461, Val Acc: 80.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.45it/s, loss=0.0869]


Epoch [3/10] - Train Loss: 0.3516, Train Acc: 91.30%, Val Loss: 0.8399, Val Acc: 87.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.49it/s, loss=0.931]


Epoch [4/10] - Train Loss: 0.5727, Train Acc: 88.90%, Val Loss: 2.2312, Val Acc: 61.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.41it/s, loss=0.0583]


Epoch [5/10] - Train Loss: 0.5531, Train Acc: 91.10%, Val Loss: 2.2452, Val Acc: 75.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.40it/s, loss=0]


Epoch [6/10] - Train Loss: 0.1067, Train Acc: 97.70%, Val Loss: 0.7936, Val Acc: 87.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.49it/s, loss=2.77e-5]


Epoch [7/10] - Train Loss: 0.0057, Train Acc: 99.70%, Val Loss: 0.9086, Val Acc: 91.00%


Epoch [8/10]: 100%|███████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.43it/s, loss=0.000568]


Epoch [8/10] - Train Loss: 0.0013, Train Acc: 99.90%, Val Loss: 0.7609, Val Acc: 91.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.40it/s, loss=0]


Epoch [9/10] - Train Loss: 0.0077, Train Acc: 99.90%, Val Loss: 0.8291, Val Acc: 91.00%


Epoch [10/10]: 100%|█████████████████████████████████████████████████████████| 125/125 [00:16<00:00,  7.40it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.8695, Val Acc: 88.00%
Best Validation Accuracy: 91.00%
Training with optimizer AdamW and batch size 16


Epoch [1/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.00it/s, loss=0.621]


Epoch [1/10] - Train Loss: 1.1419, Train Acc: 61.00%, Val Loss: 0.4778, Val Acc: 87.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.02it/s, loss=0.441]


Epoch [2/10] - Train Loss: 0.3946, Train Acc: 87.90%, Val Loss: 0.4201, Val Acc: 89.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.03it/s, loss=0.049]


Epoch [3/10] - Train Loss: 0.1990, Train Acc: 93.70%, Val Loss: 0.2324, Val Acc: 92.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.03it/s, loss=0.00332]


Epoch [4/10] - Train Loss: 0.2106, Train Acc: 95.80%, Val Loss: 1.2452, Val Acc: 77.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  4.99it/s, loss=1.25]


Epoch [5/10] - Train Loss: 0.3159, Train Acc: 92.60%, Val Loss: 0.4879, Val Acc: 85.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.02it/s, loss=0.656]


Epoch [6/10] - Train Loss: 0.0800, Train Acc: 98.20%, Val Loss: 0.5382, Val Acc: 92.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  5.03it/s, loss=1.04e-7]


Epoch [7/10] - Train Loss: 0.0193, Train Acc: 99.50%, Val Loss: 0.4318, Val Acc: 93.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  4.90it/s, loss=1.49e-8]


Epoch [8/10] - Train Loss: 0.0270, Train Acc: 99.70%, Val Loss: 0.7847, Val Acc: 91.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:12<00:00,  4.89it/s, loss=1.39e-6]


Epoch [9/10] - Train Loss: 0.0132, Train Acc: 99.90%, Val Loss: 0.8123, Val Acc: 92.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 63/63 [00:12<00:00,  4.93it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0059, Train Acc: 99.90%, Val Loss: 0.8977, Val Acc: 90.00%
Best Validation Accuracy: 93.00%
Training with optimizer AdamW and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  2.99it/s, loss=1.19]


Epoch [1/10] - Train Loss: 1.2235, Train Acc: 57.20%, Val Loss: 0.4513, Val Acc: 88.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.03it/s, loss=0.546]


Epoch [2/10] - Train Loss: 0.3858, Train Acc: 86.60%, Val Loss: 0.3125, Val Acc: 85.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.04it/s, loss=0.334]


Epoch [3/10] - Train Loss: 0.2414, Train Acc: 91.30%, Val Loss: 0.3024, Val Acc: 88.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.08it/s, loss=0.0101]


Epoch [4/10] - Train Loss: 0.1587, Train Acc: 95.50%, Val Loss: 0.2314, Val Acc: 89.00%


Epoch [5/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:10<00:00,  3.07it/s, loss=0.00386]


Epoch [5/10] - Train Loss: 0.0503, Train Acc: 98.10%, Val Loss: 0.2674, Val Acc: 89.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:21<00:00,  1.50it/s, loss=0.016]


Epoch [6/10] - Train Loss: 0.0284, Train Acc: 98.90%, Val Loss: 0.2187, Val Acc: 94.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:13<00:00,  2.31it/s, loss=5.96e-7]


Epoch [7/10] - Train Loss: 0.0023, Train Acc: 99.90%, Val Loss: 0.2877, Val Acc: 95.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████████| 32/32 [00:11<00:00,  2.78it/s, loss=0]


Epoch [8/10] - Train Loss: 0.0084, Train Acc: 99.80%, Val Loss: 0.2965, Val Acc: 93.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:19<00:00,  1.68it/s, loss=9.34e-5]


Epoch [9/10] - Train Loss: 0.0064, Train Acc: 99.90%, Val Loss: 0.3826, Val Acc: 94.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 32/32 [00:21<00:00,  1.48it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0010, Train Acc: 99.90%, Val Loss: 0.5141, Val Acc: 93.00%
Best Validation Accuracy: 95.00%
Training with optimizer Adamax and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:33<00:00,  3.72it/s, loss=1.02]


Epoch [1/10] - Train Loss: 1.1041, Train Acc: 62.70%, Val Loss: 0.3673, Val Acc: 89.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:33<00:00,  3.75it/s, loss=0.258]


Epoch [2/10] - Train Loss: 0.4005, Train Acc: 88.00%, Val Loss: 0.3112, Val Acc: 88.00%


Epoch [3/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:32<00:00,  3.79it/s, loss=0.012]


Epoch [3/10] - Train Loss: 0.2021, Train Acc: 93.50%, Val Loss: 0.5129, Val Acc: 86.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:32<00:00,  3.88it/s, loss=0.00434]


Epoch [4/10] - Train Loss: 0.2051, Train Acc: 96.40%, Val Loss: 0.5217, Val Acc: 89.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:31<00:00,  3.94it/s, loss=1.07]


Epoch [5/10] - Train Loss: 0.2533, Train Acc: 96.70%, Val Loss: 0.5116, Val Acc: 92.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:30<00:00,  4.06it/s, loss=0]


Epoch [6/10] - Train Loss: 0.0431, Train Acc: 99.00%, Val Loss: 0.3714, Val Acc: 94.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:30<00:00,  4.15it/s, loss=0]


Epoch [7/10] - Train Loss: 0.0241, Train Acc: 99.50%, Val Loss: 0.6058, Val Acc: 91.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:30<00:00,  4.12it/s, loss=2.98e-8]


Epoch [8/10] - Train Loss: 0.0133, Train Acc: 99.60%, Val Loss: 0.4785, Val Acc: 93.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:24<00:00,  5.15it/s, loss=0.00044]


Epoch [9/10] - Train Loss: 0.0295, Train Acc: 99.60%, Val Loss: 0.5644, Val Acc: 92.00%


Epoch [10/10]: 100%|█████████████████████████████████████████████████████████| 125/125 [00:29<00:00,  4.21it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0252, Train Acc: 99.60%, Val Loss: 0.4329, Val Acc: 92.00%
Best Validation Accuracy: 94.00%
Training with optimizer Adamax and batch size 16


Epoch [1/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.12it/s, loss=0.137]


Epoch [1/10] - Train Loss: 1.2336, Train Acc: 56.80%, Val Loss: 0.3869, Val Acc: 87.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.07it/s, loss=0.211]


Epoch [2/10] - Train Loss: 0.3389, Train Acc: 88.30%, Val Loss: 0.2330, Val Acc: 89.00%


Epoch [3/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.08it/s, loss=0.00683]


Epoch [3/10] - Train Loss: 0.1944, Train Acc: 93.80%, Val Loss: 0.1854, Val Acc: 94.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.04it/s, loss=0.0104]


Epoch [4/10] - Train Loss: 0.1377, Train Acc: 95.90%, Val Loss: 0.2257, Val Acc: 89.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.02it/s, loss=0.136]


Epoch [5/10] - Train Loss: 0.0792, Train Acc: 97.40%, Val Loss: 0.3428, Val Acc: 92.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.02it/s, loss=5.66e-7]


Epoch [6/10] - Train Loss: 0.0534, Train Acc: 98.30%, Val Loss: 0.2116, Val Acc: 91.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:21<00:00,  2.95it/s, loss=0.304]


Epoch [7/10] - Train Loss: 0.0372, Train Acc: 99.10%, Val Loss: 0.2268, Val Acc: 94.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:21<00:00,  2.90it/s, loss=1.04e-7]


Epoch [8/10] - Train Loss: 0.0407, Train Acc: 99.10%, Val Loss: 0.2565, Val Acc: 92.00%


Epoch [9/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:23<00:00,  2.70it/s, loss=0.0773]


Epoch [9/10] - Train Loss: 0.0360, Train Acc: 98.90%, Val Loss: 0.2378, Val Acc: 92.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.00it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0140, Train Acc: 99.40%, Val Loss: 0.2215, Val Acc: 92.00%
Best Validation Accuracy: 94.00%
Training with optimizer Adamax and batch size 32


Epoch [1/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:11<00:00,  2.75it/s, loss=0.635]


Epoch [1/10] - Train Loss: 1.5173, Train Acc: 47.70%, Val Loss: 0.6617, Val Acc: 82.00%


Epoch [2/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:11<00:00,  2.71it/s, loss=0.0977]


Epoch [2/10] - Train Loss: 0.4397, Train Acc: 84.70%, Val Loss: 0.4020, Val Acc: 87.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:12<00:00,  2.65it/s, loss=0.356]


Epoch [3/10] - Train Loss: 0.2347, Train Acc: 93.00%, Val Loss: 0.2234, Val Acc: 94.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:12<00:00,  2.59it/s, loss=0.0195]


Epoch [4/10] - Train Loss: 0.1213, Train Acc: 95.90%, Val Loss: 0.2786, Val Acc: 92.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:12<00:00,  2.52it/s, loss=0.0277]


Epoch [5/10] - Train Loss: 0.0938, Train Acc: 96.80%, Val Loss: 0.2366, Val Acc: 89.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:12<00:00,  2.53it/s, loss=0.018]


Epoch [6/10] - Train Loss: 0.0529, Train Acc: 98.40%, Val Loss: 0.1940, Val Acc: 93.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:12<00:00,  2.48it/s, loss=0.00853]


Epoch [7/10] - Train Loss: 0.0342, Train Acc: 98.70%, Val Loss: 0.1682, Val Acc: 94.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:13<00:00,  2.45it/s, loss=2.95e-6]


Epoch [8/10] - Train Loss: 0.0193, Train Acc: 99.30%, Val Loss: 0.1861, Val Acc: 93.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:20<00:00,  1.55it/s, loss=0.01]


Epoch [9/10] - Train Loss: 0.0348, Train Acc: 99.10%, Val Loss: 0.1936, Val Acc: 94.00%


Epoch [10/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:20<00:00,  1.54it/s, loss=0.0164]


Epoch [10/10] - Train Loss: 0.0124, Train Acc: 99.70%, Val Loss: 0.1989, Val Acc: 94.00%
Best Validation Accuracy: 94.00%
Training with optimizer RAdam and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:33<00:00,  3.70it/s, loss=1.15]


Epoch [1/10] - Train Loss: 2.0888, Train Acc: 27.90%, Val Loss: 1.1581, Val Acc: 69.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:29<00:00,  4.20it/s, loss=0.122]


Epoch [2/10] - Train Loss: 0.6729, Train Acc: 77.40%, Val Loss: 0.5253, Val Acc: 86.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:29<00:00,  4.20it/s, loss=0.00151]


Epoch [3/10] - Train Loss: 0.3841, Train Acc: 87.30%, Val Loss: 0.3814, Val Acc: 89.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:32<00:00,  3.85it/s, loss=0.117]


Epoch [4/10] - Train Loss: 0.3814, Train Acc: 91.10%, Val Loss: 0.6024, Val Acc: 86.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:34<00:00,  3.58it/s, loss=1.82]


Epoch [5/10] - Train Loss: 0.4155, Train Acc: 90.90%, Val Loss: 0.6365, Val Acc: 84.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:30<00:00,  4.13it/s, loss=0.00013]


Epoch [6/10] - Train Loss: 0.0862, Train Acc: 98.10%, Val Loss: 0.5533, Val Acc: 86.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:26<00:00,  4.75it/s, loss=0]


Epoch [7/10] - Train Loss: 0.0339, Train Acc: 99.20%, Val Loss: 0.4752, Val Acc: 90.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:30<00:00,  4.15it/s, loss=8.74e-5]


Epoch [8/10] - Train Loss: 0.0162, Train Acc: 99.60%, Val Loss: 0.6352, Val Acc: 90.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:31<00:00,  4.02it/s, loss=0]


Epoch [9/10] - Train Loss: 0.0123, Train Acc: 99.80%, Val Loss: 0.5899, Val Acc: 91.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████| 125/125 [00:30<00:00,  4.10it/s, loss=1.43e-5]


Epoch [10/10] - Train Loss: 0.0052, Train Acc: 99.70%, Val Loss: 0.6858, Val Acc: 89.00%
Best Validation Accuracy: 91.00%
Training with optimizer RAdam and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.14it/s, loss=2.35]


Epoch [1/10] - Train Loss: 2.4047, Train Acc: 14.70%, Val Loss: 2.0765, Val Acc: 33.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:18<00:00,  3.39it/s, loss=1.26]


Epoch [2/10] - Train Loss: 1.6603, Train Acc: 45.10%, Val Loss: 1.0915, Val Acc: 80.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:18<00:00,  3.38it/s, loss=0.557]


Epoch [3/10] - Train Loss: 0.7555, Train Acc: 75.20%, Val Loss: 0.4128, Val Acc: 89.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:21<00:00,  2.95it/s, loss=0.148]


Epoch [4/10] - Train Loss: 0.3981, Train Acc: 86.60%, Val Loss: 0.2667, Val Acc: 90.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:19<00:00,  3.17it/s, loss=0.659]


Epoch [5/10] - Train Loss: 0.3252, Train Acc: 88.90%, Val Loss: 0.2321, Val Acc: 91.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:23<00:00,  2.69it/s, loss=0.152]


Epoch [6/10] - Train Loss: 0.1385, Train Acc: 95.40%, Val Loss: 0.1796, Val Acc: 93.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:22<00:00,  2.77it/s, loss=0.248]


Epoch [7/10] - Train Loss: 0.1132, Train Acc: 95.90%, Val Loss: 0.1778, Val Acc: 91.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:18<00:00,  3.40it/s, loss=0.00673]


Epoch [8/10] - Train Loss: 0.0684, Train Acc: 97.90%, Val Loss: 0.1825, Val Acc: 92.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.10it/s, loss=0.852]


Epoch [9/10] - Train Loss: 0.0829, Train Acc: 97.40%, Val Loss: 0.2161, Val Acc: 91.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:24<00:00,  2.62it/s, loss=0.312]


Epoch [10/10] - Train Loss: 0.0635, Train Acc: 98.20%, Val Loss: 0.2437, Val Acc: 92.00%
Best Validation Accuracy: 93.00%
Training with optimizer RAdam and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:16<00:00,  1.98it/s, loss=2.29]


Epoch [1/10] - Train Loss: 2.4240, Train Acc: 14.50%, Val Loss: 2.1699, Val Acc: 23.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:14<00:00,  2.17it/s, loss=1.95]


Epoch [2/10] - Train Loss: 2.1617, Train Acc: 21.30%, Val Loss: 1.8326, Val Acc: 55.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:12<00:00,  2.52it/s, loss=0.904]


Epoch [3/10] - Train Loss: 1.5708, Train Acc: 50.50%, Val Loss: 1.1998, Val Acc: 77.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:22<00:00,  1.45it/s, loss=0.917]


Epoch [4/10] - Train Loss: 0.8971, Train Acc: 71.90%, Val Loss: 0.5555, Val Acc: 85.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:22<00:00,  1.42it/s, loss=0.367]


Epoch [5/10] - Train Loss: 0.5174, Train Acc: 82.70%, Val Loss: 0.3475, Val Acc: 89.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:17<00:00,  1.86it/s, loss=0.314]


Epoch [6/10] - Train Loss: 0.4359, Train Acc: 85.90%, Val Loss: 0.3284, Val Acc: 90.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:13<00:00,  2.34it/s, loss=0.492]


Epoch [7/10] - Train Loss: 0.4029, Train Acc: 87.40%, Val Loss: 0.3002, Val Acc: 91.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:19<00:00,  1.63it/s, loss=0.284]


Epoch [8/10] - Train Loss: 0.3567, Train Acc: 88.40%, Val Loss: 0.2904, Val Acc: 90.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:22<00:00,  1.41it/s, loss=0.119]


Epoch [9/10] - Train Loss: 0.3487, Train Acc: 87.90%, Val Loss: 0.2790, Val Acc: 90.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:19<00:00,  1.61it/s, loss=0.581]


Epoch [10/10] - Train Loss: 0.3250, Train Acc: 89.60%, Val Loss: 0.2602, Val Acc: 91.00%
Best Validation Accuracy: 91.00%
Training with optimizer NAdam and batch size 8


Epoch [1/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:35<00:00,  3.51it/s, loss=0.807]


Epoch [1/10] - Train Loss: 1.0043, Train Acc: 66.90%, Val Loss: 0.5277, Val Acc: 81.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:37<00:00,  3.37it/s, loss=0.173]


Epoch [2/10] - Train Loss: 0.5758, Train Acc: 84.70%, Val Loss: 0.5862, Val Acc: 83.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:34<00:00,  3.60it/s, loss=0.0018]


Epoch [3/10] - Train Loss: 0.4650, Train Acc: 89.40%, Val Loss: 0.7090, Val Acc: 83.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:28<00:00,  4.33it/s, loss=0.803]


Epoch [4/10] - Train Loss: 0.5509, Train Acc: 93.50%, Val Loss: 2.2833, Val Acc: 76.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████| 125/125 [00:29<00:00,  4.23it/s, loss=0.000697]


Epoch [5/10] - Train Loss: 0.4238, Train Acc: 94.80%, Val Loss: 5.5858, Val Acc: 70.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:32<00:00,  3.87it/s, loss=0]


Epoch [6/10] - Train Loss: 0.1194, Train Acc: 98.60%, Val Loss: 0.7922, Val Acc: 90.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:29<00:00,  4.17it/s, loss=0]


Epoch [7/10] - Train Loss: 0.0017, Train Acc: 99.90%, Val Loss: 1.0170, Val Acc: 90.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:29<00:00,  4.26it/s, loss=0]


Epoch [8/10] - Train Loss: 0.0242, Train Acc: 99.80%, Val Loss: 1.0886, Val Acc: 86.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████████| 125/125 [00:31<00:00,  4.00it/s, loss=0]


Epoch [9/10] - Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 1.0789, Val Acc: 86.00%


Epoch [10/10]: 100%|█████████████████████████████████████████████████████████| 125/125 [00:29<00:00,  4.20it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 1.3289, Val Acc: 83.00%
Best Validation Accuracy: 90.00%
Training with optimizer NAdam and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:23<00:00,  2.70it/s, loss=0.72]


Epoch [1/10] - Train Loss: 1.0234, Train Acc: 64.70%, Val Loss: 0.3718, Val Acc: 84.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:20<00:00,  3.02it/s, loss=0.503]


Epoch [2/10] - Train Loss: 0.3727, Train Acc: 87.50%, Val Loss: 0.3958, Val Acc: 82.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:24<00:00,  2.54it/s, loss=0.165]


Epoch [3/10] - Train Loss: 0.3015, Train Acc: 91.40%, Val Loss: 0.6630, Val Acc: 80.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:24<00:00,  2.58it/s, loss=0.0637]


Epoch [4/10] - Train Loss: 0.2492, Train Acc: 94.30%, Val Loss: 0.3385, Val Acc: 89.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:18<00:00,  3.34it/s, loss=0.0417]


Epoch [5/10] - Train Loss: 0.1961, Train Acc: 95.40%, Val Loss: 0.6396, Val Acc: 89.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:17<00:00,  3.55it/s, loss=0.76]


Epoch [6/10] - Train Loss: 0.0419, Train Acc: 99.00%, Val Loss: 0.4822, Val Acc: 92.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████| 63/63 [00:22<00:00,  2.78it/s, loss=0.000126]


Epoch [7/10] - Train Loss: 0.0337, Train Acc: 99.50%, Val Loss: 0.5946, Val Acc: 94.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:24<00:00,  2.53it/s, loss=0]


Epoch [8/10] - Train Loss: 0.0061, Train Acc: 99.70%, Val Loss: 0.5739, Val Acc: 93.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████████| 63/63 [00:22<00:00,  2.76it/s, loss=0]


Epoch [9/10] - Train Loss: 0.0018, Train Acc: 99.90%, Val Loss: 0.5615, Val Acc: 90.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 63/63 [00:17<00:00,  3.51it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0010, Train Acc: 99.90%, Val Loss: 0.5687, Val Acc: 92.00%
Best Validation Accuracy: 94.00%
Training with optimizer NAdam and batch size 32


Epoch [1/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:19<00:00,  1.62it/s, loss=0.483]


Epoch [1/10] - Train Loss: 1.3407, Train Acc: 52.10%, Val Loss: 0.5017, Val Acc: 79.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:22<00:00,  1.43it/s, loss=0.265]


Epoch [2/10] - Train Loss: 0.3536, Train Acc: 89.00%, Val Loss: 0.2555, Val Acc: 91.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:16<00:00,  1.94it/s, loss=0.173]


Epoch [3/10] - Train Loss: 0.1767, Train Acc: 93.80%, Val Loss: 0.1898, Val Acc: 91.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:16<00:00,  1.99it/s, loss=0.129]


Epoch [4/10] - Train Loss: 0.1068, Train Acc: 97.10%, Val Loss: 0.3392, Val Acc: 81.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████| 32/32 [00:22<00:00,  1.45it/s, loss=0.000403]


Epoch [5/10] - Train Loss: 0.0743, Train Acc: 97.70%, Val Loss: 0.4790, Val Acc: 83.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████| 32/32 [00:20<00:00,  1.57it/s, loss=0.000442]


Epoch [6/10] - Train Loss: 0.0515, Train Acc: 98.50%, Val Loss: 0.2710, Val Acc: 90.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:12<00:00,  2.64it/s, loss=1.34e-7]


Epoch [7/10] - Train Loss: 0.0075, Train Acc: 99.80%, Val Loss: 0.3359, Val Acc: 90.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:21<00:00,  1.52it/s, loss=2.88e-6]


Epoch [8/10] - Train Loss: 0.0036, Train Acc: 99.80%, Val Loss: 0.3899, Val Acc: 91.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 32/32 [00:23<00:00,  1.39it/s, loss=8.54e-6]


Epoch [9/10] - Train Loss: 0.0005, Train Acc: 100.00%, Val Loss: 0.4181, Val Acc: 91.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████████| 32/32 [00:15<00:00,  2.03it/s, loss=0]


Epoch [10/10] - Train Loss: 0.0145, Train Acc: 99.70%, Val Loss: 1.1992, Val Acc: 86.00%
Best Validation Accuracy: 91.00%
