In [15]:
import torch

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## Custom CNN Network using VGG Blocks

In [16]:
import torch.nn as nn
import torch.nn.functional as F

class CustomVGG(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomVGG, self).__init__()
        
        # VGG blocks
        self.block1 = self._make_vgg_block(3, 64, 2)
        self.block2 = self._make_vgg_block(64, 128, 2)
        self.block3 = self._make_vgg_block(128, 256, 3)
        self.block4 = self._make_vgg_block(256, 512, 3)
        self.block5 = self._make_vgg_block(512, 512, 3)
        
        # Fully connected layers
        self.fc1 = nn.Linear(512 * 7 * 7, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, num_classes)
        
        # Dropout layers
        self.dropout = nn.Dropout(0.5)
        
    def _make_vgg_block(self, in_channels, out_channels, num_convs):
        layers = []
        for _ in range(num_convs):
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
            layers.append(nn.ReLU(inplace=True))
            in_channels = out_channels
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        
        x = torch.flatten(x, 1)
        
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        
        return x

## Data Loader

In [17]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

data_dir = 'ImageNet/'
def get_data_loaders(data_dir, batch_size):
    train_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    val_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transforms)
    val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=val_transforms)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    return train_loader, val_loader

## Optimizers

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Define a function to load ResNet50 with modified output layer
def load_resnet50(num_classes):
    # Load pre-trained ResNet50
    model =  models.resnet50(weights='ResNet50_Weights.DEFAULT')
    # Modify the last fully connected layer to output 10 classes
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    return model

# # Define a function to load VGG16 with modified output layer
# def load_vgg16(num_classes):
#     # Load pre-trained VGG16
#     model = models.vgg16(weights='VGG16_Weights.DEFAULT')
#     # Modify the last fully connected layer to output 10 classes
#     num_features = model.classifier[6].in_features
#     model.classifier[6] = nn.Linear(num_features, num_classes)
#     return model

In [19]:
import torch.optim as optim

# Function to create and return the model and optimizer
def create_model_optimizer(optimizer_name, lr):
    # model = CustomVGG(num_classes=10).to(device)
    # model = load_vgg16(num_classes=10).to(device)
    model = load_resnet50(num_classes=10).to(device)
    optimizers = {
        'SGD': optim.SGD(model.parameters(), lr=lr, momentum=0.9),
        'Adam': optim.Adam(model.parameters(), lr=lr),
        'RMSprop': optim.RMSprop(model.parameters(), lr=lr),
        'AdamW': optim.AdamW(model.parameters(), lr=lr),
        'Adamax': optim.Adamax(model.parameters(), lr=lr),
        'SparseAdam': optim.SparseAdam(model.parameters(), lr=lr),
        'RAdam': optim.RAdam(model.parameters(), lr=lr),
        'NAdam': optim.NAdam(model.parameters(), lr=lr)
    }
    optimizer = optimizers[optimizer_name]
    return model, optimizer

# Training Loop

In [20]:
import pandas as pd
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast

# Function to train and evaluate the model
def train_and_evaluate(optimizer_name, lr, train_loader, val_loader):
    model, optimizer = create_model_optimizer(optimizer_name, lr)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    scaler = GradScaler()

    best_val_acc = 0
    best_model_wts = None
    patience = 5
    trigger_times = 0

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    # Initialize lists to store epoch-wise data
    epoch_data = {
        'Epoch': [],
        'Train Loss': [],
        'Train Acc': [],
        'Val Loss': [],
        'Val Acc': []
    }

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        loop = tqdm(train_loader, desc=f'Epoch [{epoch+1}/{num_epochs}]', leave=True)
        for inputs, labels in loop:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loop.set_postfix(loss=loss.item())
        
        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        val_loss, val_correct, val_total = 0.0, 0, 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        scheduler.step()

        # Add epoch-wise data to the dictionary
        epoch_data['Epoch'].append(epoch + 1)
        epoch_data['Train Loss'].append(train_loss)
        epoch_data['Train Acc'].append(train_accuracy)
        epoch_data['Val Loss'].append(val_loss)
        epoch_data['Val Acc'].append(val_accuracy)

        print(f'Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%')

        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            best_model_wts = model.state_dict().copy()
            trigger_times = 0
        else:
            trigger_times += 1

        if trigger_times >= patience:
            print('Early stopping triggered')
            break

    print(f'Best Validation Accuracy: {best_val_acc:.2f}%')
    if best_model_wts:
        model.load_state_dict(best_model_wts)
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'best_val_acc': best_val_acc
    }, f'model_{optimizer_name}_batch_size_{batch_size}.pth')

    # Convert epoch-wise data dictionary to DataFrame
    df = pd.DataFrame(epoch_data)

    # Save DataFrame to CSV file
    df.to_csv(f'epoch_data_{optimizer_name}_batch_size_{batch_size}.csv', index=False)

    return train_losses, val_losses, train_accuracies, val_accuracies

In [21]:
# List of optimizers to be used
# optimizers_list = ['SGD', 'Adam', 'RMSprop', 'AdamW', 'Adamax', 'SparseAdam', 'RAdam', 'NAdam']
optimizers_list = ['SGD', 'Adam', 'RMSprop', 'AdamW', 'Adamax', 'RAdam', 'NAdam']
# optimizers_list = ['RAdam', 'NAdam']

# List of batch sizes to be used
batch_sizes = [8, 16, 32]
lr = 0.0001
num_epochs = 10

# Train and evaluate for different optimizers and batch sizes
loss_dict, acc_dict = {}, {}

for optimizer_name in optimizers_list:
    for batch_size in batch_sizes:
        print(f'Training with optimizer {optimizer_name} and batch size {batch_size}')
        train_loader, val_loader = get_data_loaders(data_dir, batch_size)
        train_losses, val_losses, train_accuracies, val_accuracies = train_and_evaluate(optimizer_name, lr, train_loader, val_loader)
        key = f'{optimizer_name}_bs{batch_size}'
        loss_dict[key] = {'train': train_losses, 'val': val_losses}
        acc_dict[key] = {'train': train_accuracies, 'val': val_accuracies}

Training with optimizer SGD and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.06it/s, loss=2.32]


Epoch [1/10] - Train Loss: 2.3060, Train Acc: 9.10%, Val Loss: 2.3090, Val Acc: 6.00%


Epoch [2/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 19.25it/s, loss=2.32]


Epoch [2/10] - Train Loss: 2.3060, Train Acc: 9.00%, Val Loss: 2.3108, Val Acc: 5.00%


Epoch [3/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 18.97it/s, loss=2.21]


Epoch [3/10] - Train Loss: 2.3040, Train Acc: 8.10%, Val Loss: 2.3123, Val Acc: 8.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 19.01it/s, loss=2.32]


Epoch [4/10] - Train Loss: 2.3009, Train Acc: 9.50%, Val Loss: 2.3151, Val Acc: 5.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 18.88it/s, loss=2.38]


Epoch [5/10] - Train Loss: 2.3021, Train Acc: 8.90%, Val Loss: 2.3101, Val Acc: 6.00%


Epoch [6/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 19.52it/s, loss=2.31]


Epoch [6/10] - Train Loss: 2.3028, Train Acc: 8.80%, Val Loss: 2.3169, Val Acc: 7.00%


Epoch [7/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 19.15it/s, loss=2.34]


Epoch [7/10] - Train Loss: 2.3023, Train Acc: 9.90%, Val Loss: 2.3128, Val Acc: 5.00%


Epoch [8/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 19.05it/s, loss=2.32]


Epoch [8/10] - Train Loss: 2.3011, Train Acc: 9.80%, Val Loss: 2.3113, Val Acc: 4.00%
Early stopping triggered
Best Validation Accuracy: 8.00%
Training with optimizer SGD and batch size 16


Epoch [1/10]: 100%|██████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.43it/s, loss=2.3]


Epoch [1/10] - Train Loss: 2.3008, Train Acc: 8.50%, Val Loss: 2.2777, Val Acc: 13.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.58it/s, loss=2.27]


Epoch [2/10] - Train Loss: 2.3016, Train Acc: 9.30%, Val Loss: 2.2786, Val Acc: 9.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.46it/s, loss=2.37]


Epoch [3/10] - Train Loss: 2.3025, Train Acc: 10.00%, Val Loss: 2.2829, Val Acc: 13.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.83it/s, loss=2.24]


Epoch [4/10] - Train Loss: 2.3033, Train Acc: 8.90%, Val Loss: 2.2718, Val Acc: 12.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.85it/s, loss=2.33]


Epoch [5/10] - Train Loss: 2.3067, Train Acc: 7.80%, Val Loss: 2.2705, Val Acc: 13.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.89it/s, loss=2.34]


Epoch [6/10] - Train Loss: 2.2994, Train Acc: 9.50%, Val Loss: 2.2748, Val Acc: 14.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.89it/s, loss=2.28]


Epoch [7/10] - Train Loss: 2.3029, Train Acc: 8.70%, Val Loss: 2.2785, Val Acc: 13.00%


Epoch [8/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.85it/s, loss=2.32]


Epoch [8/10] - Train Loss: 2.3032, Train Acc: 8.80%, Val Loss: 2.2813, Val Acc: 14.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.66it/s, loss=2.28]


Epoch [9/10] - Train Loss: 2.2992, Train Acc: 7.90%, Val Loss: 2.2763, Val Acc: 13.00%


Epoch [10/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.78it/s, loss=2.25]


Epoch [10/10] - Train Loss: 2.3038, Train Acc: 7.90%, Val Loss: 2.2769, Val Acc: 15.00%
Best Validation Accuracy: 15.00%
Training with optimizer SGD and batch size 32


Epoch [1/10]: 100%|██████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.40it/s, loss=2.4]


Epoch [1/10] - Train Loss: 2.3174, Train Acc: 13.00%, Val Loss: 2.2987, Val Acc: 10.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.50it/s, loss=2.32]


Epoch [2/10] - Train Loss: 2.3137, Train Acc: 11.00%, Val Loss: 2.3006, Val Acc: 11.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.44it/s, loss=2.34]


Epoch [3/10] - Train Loss: 2.3126, Train Acc: 12.90%, Val Loss: 2.3028, Val Acc: 11.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.29it/s, loss=2.31]


Epoch [4/10] - Train Loss: 2.3095, Train Acc: 12.20%, Val Loss: 2.3038, Val Acc: 11.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.42it/s, loss=2.29]


Epoch [5/10] - Train Loss: 2.3120, Train Acc: 12.90%, Val Loss: 2.3010, Val Acc: 8.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.54it/s, loss=2.3]


Epoch [6/10] - Train Loss: 2.3102, Train Acc: 12.80%, Val Loss: 2.3046, Val Acc: 11.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.43it/s, loss=2.33]


Epoch [7/10] - Train Loss: 2.3158, Train Acc: 11.30%, Val Loss: 2.3068, Val Acc: 8.00%
Early stopping triggered
Best Validation Accuracy: 11.00%
Training with optimizer Adam and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.80it/s, loss=0.86]


Epoch [1/10] - Train Loss: 1.7551, Train Acc: 58.20%, Val Loss: 0.6005, Val Acc: 87.00%


Epoch [2/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.84it/s, loss=1.18]


Epoch [2/10] - Train Loss: 0.4643, Train Acc: 88.20%, Val Loss: 0.2557, Val Acc: 92.00%


Epoch [3/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.65it/s, loss=0.47]


Epoch [3/10] - Train Loss: 0.2344, Train Acc: 93.70%, Val Loss: 0.1207, Val Acc: 97.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.35it/s, loss=1.63]


Epoch [4/10] - Train Loss: 0.1714, Train Acc: 94.30%, Val Loss: 0.2787, Val Acc: 92.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.14it/s, loss=0.00373]


Epoch [5/10] - Train Loss: 0.1085, Train Acc: 96.90%, Val Loss: 0.1551, Val Acc: 95.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.09it/s, loss=0.00103]


Epoch [6/10] - Train Loss: 0.1048, Train Acc: 96.90%, Val Loss: 0.1851, Val Acc: 93.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.02it/s, loss=0.0101]


Epoch [7/10] - Train Loss: 0.0359, Train Acc: 99.10%, Val Loss: 0.0808, Val Acc: 95.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.41it/s, loss=0.358]


Epoch [8/10] - Train Loss: 0.0319, Train Acc: 99.30%, Val Loss: 0.1447, Val Acc: 96.00%
Early stopping triggered
Best Validation Accuracy: 97.00%
Training with optimizer Adam and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.15it/s, loss=1.79]


Epoch [1/10] - Train Loss: 2.0907, Train Acc: 47.20%, Val Loss: 1.8228, Val Acc: 81.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.93it/s, loss=0.766]


Epoch [2/10] - Train Loss: 1.3252, Train Acc: 85.20%, Val Loss: 0.8293, Val Acc: 91.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.01it/s, loss=0.903]


Epoch [3/10] - Train Loss: 0.5755, Train Acc: 91.60%, Val Loss: 0.3536, Val Acc: 94.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.91it/s, loss=0.567]


Epoch [4/10] - Train Loss: 0.2830, Train Acc: 94.90%, Val Loss: 0.2341, Val Acc: 97.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.82it/s, loss=0.0415]


Epoch [5/10] - Train Loss: 0.1568, Train Acc: 97.60%, Val Loss: 0.1571, Val Acc: 96.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.79it/s, loss=0.248]


Epoch [6/10] - Train Loss: 0.1088, Train Acc: 98.20%, Val Loss: 0.1414, Val Acc: 97.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.10it/s, loss=0.139]


Epoch [7/10] - Train Loss: 0.1092, Train Acc: 98.10%, Val Loss: 0.1405, Val Acc: 97.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.07it/s, loss=0.454]


Epoch [8/10] - Train Loss: 0.0804, Train Acc: 98.90%, Val Loss: 0.1234, Val Acc: 97.00%


Epoch [9/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.98it/s, loss=0.0925]


Epoch [9/10] - Train Loss: 0.0685, Train Acc: 99.30%, Val Loss: 0.1175, Val Acc: 99.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.11it/s, loss=0.204]


Epoch [10/10] - Train Loss: 0.0636, Train Acc: 99.40%, Val Loss: 0.1289, Val Acc: 96.00%
Best Validation Accuracy: 99.00%
Training with optimizer Adam and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.15it/s, loss=2.07]


Epoch [1/10] - Train Loss: 2.1865, Train Acc: 35.70%, Val Loss: 2.0282, Val Acc: 73.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.28it/s, loss=1.37]


Epoch [2/10] - Train Loss: 1.7419, Train Acc: 82.00%, Val Loss: 1.4759, Val Acc: 84.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.39it/s, loss=1.11]


Epoch [3/10] - Train Loss: 1.0969, Train Acc: 92.00%, Val Loss: 0.7296, Val Acc: 87.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.30it/s, loss=0.83]


Epoch [4/10] - Train Loss: 0.5675, Train Acc: 93.70%, Val Loss: 0.3601, Val Acc: 90.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.25it/s, loss=0.808]


Epoch [5/10] - Train Loss: 0.2932, Train Acc: 97.30%, Val Loss: 0.2464, Val Acc: 95.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.24it/s, loss=0.858]


Epoch [6/10] - Train Loss: 0.1887, Train Acc: 97.70%, Val Loss: 0.2507, Val Acc: 93.00%


Epoch [7/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.21it/s, loss=0.0896]


Epoch [7/10] - Train Loss: 0.1532, Train Acc: 98.10%, Val Loss: 0.2381, Val Acc: 93.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.15it/s, loss=0.5]


Epoch [8/10] - Train Loss: 0.1630, Train Acc: 98.30%, Val Loss: 0.2250, Val Acc: 94.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.18it/s, loss=0.425]


Epoch [9/10] - Train Loss: 0.1479, Train Acc: 98.90%, Val Loss: 0.2202, Val Acc: 95.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.04it/s, loss=0.147]


Epoch [10/10] - Train Loss: 0.1265, Train Acc: 98.40%, Val Loss: 0.2218, Val Acc: 94.00%
Early stopping triggered
Best Validation Accuracy: 95.00%
Training with optimizer RMSprop and batch size 8


Epoch [1/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:06<00:00, 18.49it/s, loss=0.713]


Epoch [1/10] - Train Loss: 1.6598, Train Acc: 64.50%, Val Loss: 0.6220, Val Acc: 95.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:06<00:00, 18.00it/s, loss=0.919]


Epoch [2/10] - Train Loss: 0.5600, Train Acc: 87.20%, Val Loss: 0.2179, Val Acc: 94.00%


Epoch [3/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:06<00:00, 17.90it/s, loss=0.925]


Epoch [3/10] - Train Loss: 0.2697, Train Acc: 93.80%, Val Loss: 0.1199, Val Acc: 98.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:06<00:00, 17.91it/s, loss=0.246]


Epoch [4/10] - Train Loss: 0.1537, Train Acc: 95.70%, Val Loss: 0.1062, Val Acc: 97.00%


Epoch [5/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:06<00:00, 18.44it/s, loss=0.146]


Epoch [5/10] - Train Loss: 0.1043, Train Acc: 96.60%, Val Loss: 0.1295, Val Acc: 93.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.69it/s, loss=0.00724]


Epoch [6/10] - Train Loss: 0.1039, Train Acc: 97.50%, Val Loss: 0.1140, Val Acc: 94.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:06<00:00, 17.92it/s, loss=0.00955]


Epoch [7/10] - Train Loss: 0.0681, Train Acc: 98.00%, Val Loss: 0.0759, Val Acc: 96.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:06<00:00, 18.13it/s, loss=0.00194]


Epoch [8/10] - Train Loss: 0.0645, Train Acc: 97.90%, Val Loss: 0.1075, Val Acc: 97.00%
Early stopping triggered
Best Validation Accuracy: 98.00%
Training with optimizer RMSprop and batch size 16


Epoch [1/10]: 100%|██████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.31it/s, loss=1.4]


Epoch [1/10] - Train Loss: 1.8691, Train Acc: 57.80%, Val Loss: 1.2646, Val Acc: 88.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.16it/s, loss=0.72]


Epoch [2/10] - Train Loss: 0.7959, Train Acc: 91.10%, Val Loss: 0.4391, Val Acc: 90.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.68it/s, loss=0.353]


Epoch [3/10] - Train Loss: 0.3310, Train Acc: 95.60%, Val Loss: 0.2506, Val Acc: 92.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.40it/s, loss=0.444]


Epoch [4/10] - Train Loss: 0.1752, Train Acc: 96.60%, Val Loss: 0.2206, Val Acc: 92.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.40it/s, loss=0.108]


Epoch [5/10] - Train Loss: 0.0840, Train Acc: 98.00%, Val Loss: 0.1629, Val Acc: 93.00%


Epoch [6/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.42it/s, loss=0.0652]


Epoch [6/10] - Train Loss: 0.0693, Train Acc: 98.00%, Val Loss: 0.1234, Val Acc: 95.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.30it/s, loss=0.132]


Epoch [7/10] - Train Loss: 0.0312, Train Acc: 99.60%, Val Loss: 0.1152, Val Acc: 95.00%


Epoch [8/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.43it/s, loss=0.0571]


Epoch [8/10] - Train Loss: 0.0285, Train Acc: 99.50%, Val Loss: 0.1204, Val Acc: 94.00%


Epoch [9/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:05<00:00, 10.50it/s, loss=0.0256]


Epoch [9/10] - Train Loss: 0.0418, Train Acc: 99.00%, Val Loss: 0.1057, Val Acc: 97.00%


Epoch [10/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.50it/s, loss=0.0092]


Epoch [10/10] - Train Loss: 0.0230, Train Acc: 99.50%, Val Loss: 0.1148, Val Acc: 95.00%
Best Validation Accuracy: 97.00%
Training with optimizer RMSprop and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.32it/s, loss=1.58]


Epoch [1/10] - Train Loss: 1.9491, Train Acc: 62.70%, Val Loss: 1.5241, Val Acc: 85.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.19it/s, loss=0.997]


Epoch [2/10] - Train Loss: 1.0529, Train Acc: 92.60%, Val Loss: 0.5691, Val Acc: 95.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.18it/s, loss=0.372]


Epoch [3/10] - Train Loss: 0.4577, Train Acc: 94.80%, Val Loss: 0.3035, Val Acc: 95.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.38it/s, loss=0.48]


Epoch [4/10] - Train Loss: 0.2250, Train Acc: 97.30%, Val Loss: 0.1975, Val Acc: 94.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.53it/s, loss=0.627]


Epoch [5/10] - Train Loss: 0.1310, Train Acc: 98.50%, Val Loss: 0.1522, Val Acc: 95.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.29it/s, loss=0.327]


Epoch [6/10] - Train Loss: 0.0746, Train Acc: 99.10%, Val Loss: 0.1460, Val Acc: 99.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.23it/s, loss=0.174]


Epoch [7/10] - Train Loss: 0.0572, Train Acc: 99.60%, Val Loss: 0.1354, Val Acc: 96.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.42it/s, loss=0.382]


Epoch [8/10] - Train Loss: 0.0510, Train Acc: 99.80%, Val Loss: 0.1283, Val Acc: 97.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.37it/s, loss=0.104]


Epoch [9/10] - Train Loss: 0.0358, Train Acc: 100.00%, Val Loss: 0.1225, Val Acc: 94.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.45it/s, loss=0.206]


Epoch [10/10] - Train Loss: 0.0372, Train Acc: 99.90%, Val Loss: 0.1157, Val Acc: 97.00%
Best Validation Accuracy: 99.00%
Training with optimizer AdamW and batch size 8


Epoch [1/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.48it/s, loss=0.812]


Epoch [1/10] - Train Loss: 1.8758, Train Acc: 56.60%, Val Loss: 0.7942, Val Acc: 89.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.06it/s, loss=0.897]


Epoch [2/10] - Train Loss: 0.5343, Train Acc: 86.80%, Val Loss: 0.2800, Val Acc: 91.00%


Epoch [3/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.29it/s, loss=0.15]


Epoch [3/10] - Train Loss: 0.2549, Train Acc: 92.50%, Val Loss: 0.2650, Val Acc: 92.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.93it/s, loss=0.121]


Epoch [4/10] - Train Loss: 0.1905, Train Acc: 94.50%, Val Loss: 0.1306, Val Acc: 95.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.73it/s, loss=0.00823]


Epoch [5/10] - Train Loss: 0.1806, Train Acc: 94.70%, Val Loss: 0.2999, Val Acc: 91.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.77it/s, loss=0.018]


Epoch [6/10] - Train Loss: 0.1477, Train Acc: 96.10%, Val Loss: 0.2622, Val Acc: 94.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.98it/s, loss=0.0007]


Epoch [7/10] - Train Loss: 0.0612, Train Acc: 98.20%, Val Loss: 0.2647, Val Acc: 95.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.84it/s, loss=9.94e-5]


Epoch [8/10] - Train Loss: 0.0515, Train Acc: 99.00%, Val Loss: 0.2382, Val Acc: 93.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.98it/s, loss=0.0064]


Epoch [9/10] - Train Loss: 0.0596, Train Acc: 98.30%, Val Loss: 0.2902, Val Acc: 94.00%
Early stopping triggered
Best Validation Accuracy: 95.00%
Training with optimizer AdamW and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.97it/s, loss=2.06]


Epoch [1/10] - Train Loss: 2.1326, Train Acc: 36.90%, Val Loss: 1.8767, Val Acc: 78.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.76it/s, loss=1.32]


Epoch [2/10] - Train Loss: 1.4074, Train Acc: 85.20%, Val Loss: 0.8537, Val Acc: 90.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.97it/s, loss=0.631]


Epoch [3/10] - Train Loss: 0.6073, Train Acc: 93.80%, Val Loss: 0.3376, Val Acc: 92.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.75it/s, loss=0.298]


Epoch [4/10] - Train Loss: 0.2739, Train Acc: 96.20%, Val Loss: 0.2277, Val Acc: 95.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.65it/s, loss=0.254]


Epoch [5/10] - Train Loss: 0.1401, Train Acc: 97.90%, Val Loss: 0.1675, Val Acc: 98.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.89it/s, loss=0.12]


Epoch [6/10] - Train Loss: 0.1060, Train Acc: 97.80%, Val Loss: 0.1611, Val Acc: 95.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.18it/s, loss=0.365]


Epoch [7/10] - Train Loss: 0.0793, Train Acc: 99.40%, Val Loss: 0.1617, Val Acc: 96.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.15it/s, loss=0.189]


Epoch [8/10] - Train Loss: 0.0877, Train Acc: 98.70%, Val Loss: 0.1557, Val Acc: 96.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.20it/s, loss=0.209]


Epoch [9/10] - Train Loss: 0.0626, Train Acc: 99.20%, Val Loss: 0.1263, Val Acc: 95.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.18it/s, loss=0.218]


Epoch [10/10] - Train Loss: 0.0531, Train Acc: 99.30%, Val Loss: 0.1324, Val Acc: 95.00%
Early stopping triggered
Best Validation Accuracy: 98.00%
Training with optimizer AdamW and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.27it/s, loss=2.03]


Epoch [1/10] - Train Loss: 2.1759, Train Acc: 33.40%, Val Loss: 1.9916, Val Acc: 64.00%


Epoch [2/10]: 100%|██████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.16it/s, loss=1.5]


Epoch [2/10] - Train Loss: 1.6925, Train Acc: 82.60%, Val Loss: 1.3306, Val Acc: 82.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.32it/s, loss=1.24]


Epoch [3/10] - Train Loss: 1.0428, Train Acc: 90.90%, Val Loss: 0.6385, Val Acc: 90.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.30it/s, loss=1.1]


Epoch [4/10] - Train Loss: 0.5232, Train Acc: 96.00%, Val Loss: 0.3355, Val Acc: 95.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.37it/s, loss=0.295]


Epoch [5/10] - Train Loss: 0.2526, Train Acc: 97.10%, Val Loss: 0.2318, Val Acc: 95.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.29it/s, loss=0.171]


Epoch [6/10] - Train Loss: 0.1468, Train Acc: 99.00%, Val Loss: 0.2347, Val Acc: 96.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.36it/s, loss=0.312]


Epoch [7/10] - Train Loss: 0.1531, Train Acc: 99.30%, Val Loss: 0.2258, Val Acc: 96.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.30it/s, loss=0.614]


Epoch [8/10] - Train Loss: 0.1405, Train Acc: 99.20%, Val Loss: 0.2072, Val Acc: 94.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.35it/s, loss=0.295]


Epoch [9/10] - Train Loss: 0.1350, Train Acc: 98.80%, Val Loss: 0.2183, Val Acc: 96.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.36it/s, loss=0.107]


Epoch [10/10] - Train Loss: 0.1154, Train Acc: 98.90%, Val Loss: 0.2117, Val Acc: 96.00%
Best Validation Accuracy: 96.00%
Training with optimizer Adamax and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.19it/s, loss=2.03]


Epoch [1/10] - Train Loss: 2.1298, Train Acc: 41.30%, Val Loss: 1.7910, Val Acc: 74.00%


Epoch [2/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.12it/s, loss=1.09]


Epoch [2/10] - Train Loss: 1.3016, Train Acc: 84.10%, Val Loss: 0.8213, Val Acc: 91.00%


Epoch [3/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.72it/s, loss=0.334]


Epoch [3/10] - Train Loss: 0.6594, Train Acc: 88.40%, Val Loss: 0.4373, Val Acc: 93.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.76it/s, loss=0.351]


Epoch [4/10] - Train Loss: 0.3966, Train Acc: 92.00%, Val Loss: 0.2606, Val Acc: 97.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:06<00:00, 17.86it/s, loss=0.0357]


Epoch [5/10] - Train Loss: 0.2877, Train Acc: 94.00%, Val Loss: 0.1901, Val Acc: 97.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.84it/s, loss=0.165]


Epoch [6/10] - Train Loss: 0.1983, Train Acc: 96.30%, Val Loss: 0.1878, Val Acc: 97.00%


Epoch [7/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:06<00:00, 18.04it/s, loss=0.13]


Epoch [7/10] - Train Loss: 0.2060, Train Acc: 95.80%, Val Loss: 0.1963, Val Acc: 96.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.54it/s, loss=0.285]


Epoch [8/10] - Train Loss: 0.1838, Train Acc: 96.80%, Val Loss: 0.2079, Val Acc: 94.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.07it/s, loss=0.115]


Epoch [9/10] - Train Loss: 0.1939, Train Acc: 96.50%, Val Loss: 0.1826, Val Acc: 97.00%
Early stopping triggered
Best Validation Accuracy: 97.00%
Training with optimizer Adamax and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.07it/s, loss=2.04]


Epoch [1/10] - Train Loss: 2.1594, Train Acc: 33.70%, Val Loss: 2.0167, Val Acc: 64.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.15it/s, loss=1.65]


Epoch [2/10] - Train Loss: 1.7484, Train Acc: 80.10%, Val Loss: 1.5185, Val Acc: 83.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.20it/s, loss=1.14]


Epoch [3/10] - Train Loss: 1.1735, Train Acc: 89.50%, Val Loss: 0.8910, Val Acc: 89.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.08it/s, loss=0.543]


Epoch [4/10] - Train Loss: 0.6530, Train Acc: 93.10%, Val Loss: 0.4624, Val Acc: 92.00%


Epoch [5/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.32it/s, loss=0.314]


Epoch [5/10] - Train Loss: 0.3997, Train Acc: 93.90%, Val Loss: 0.3096, Val Acc: 94.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.93it/s, loss=0.231]


Epoch [6/10] - Train Loss: 0.2876, Train Acc: 96.10%, Val Loss: 0.3211, Val Acc: 95.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.11it/s, loss=1.01]


Epoch [7/10] - Train Loss: 0.2583, Train Acc: 96.30%, Val Loss: 0.3070, Val Acc: 95.00%


Epoch [8/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.01it/s, loss=0.38]


Epoch [8/10] - Train Loss: 0.2389, Train Acc: 97.70%, Val Loss: 0.2930, Val Acc: 95.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.34it/s, loss=0.338]


Epoch [9/10] - Train Loss: 0.2156, Train Acc: 97.40%, Val Loss: 0.2742, Val Acc: 96.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.32it/s, loss=0.236]


Epoch [10/10] - Train Loss: 0.2291, Train Acc: 96.80%, Val Loss: 0.2737, Val Acc: 95.00%
Best Validation Accuracy: 96.00%
Training with optimizer Adamax and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.35it/s, loss=2.17]


Epoch [1/10] - Train Loss: 2.1996, Train Acc: 29.30%, Val Loss: 2.1012, Val Acc: 61.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.34it/s, loss=1.88]


Epoch [2/10] - Train Loss: 1.9334, Train Acc: 71.70%, Val Loss: 1.8380, Val Acc: 75.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.28it/s, loss=1.57]


Epoch [3/10] - Train Loss: 1.5796, Train Acc: 83.70%, Val Loss: 1.4520, Val Acc: 80.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.13it/s, loss=1.3]


Epoch [4/10] - Train Loss: 1.1563, Train Acc: 90.70%, Val Loss: 1.0202, Val Acc: 85.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.32it/s, loss=1.12]


Epoch [5/10] - Train Loss: 0.7883, Train Acc: 93.40%, Val Loss: 0.6625, Val Acc: 91.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.41it/s, loss=0.716]


Epoch [6/10] - Train Loss: 0.5874, Train Acc: 95.60%, Val Loss: 0.6744, Val Acc: 89.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.25it/s, loss=0.873]


Epoch [7/10] - Train Loss: 0.5834, Train Acc: 96.00%, Val Loss: 0.6384, Val Acc: 91.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.24it/s, loss=0.757]


Epoch [8/10] - Train Loss: 0.5600, Train Acc: 95.80%, Val Loss: 0.6311, Val Acc: 88.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.27it/s, loss=0.95]


Epoch [9/10] - Train Loss: 0.5417, Train Acc: 95.60%, Val Loss: 0.6190, Val Acc: 92.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.31it/s, loss=0.609]


Epoch [10/10] - Train Loss: 0.5088, Train Acc: 95.80%, Val Loss: 0.5787, Val Acc: 91.00%
Best Validation Accuracy: 92.00%
Training with optimizer RAdam and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.91it/s, loss=2.33]


Epoch [1/10] - Train Loss: 2.3117, Train Acc: 12.60%, Val Loss: 2.2812, Val Acc: 18.00%


Epoch [2/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.10it/s, loss=2.14]


Epoch [2/10] - Train Loss: 2.1954, Train Acc: 33.00%, Val Loss: 2.0869, Val Acc: 53.00%


Epoch [3/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.97it/s, loss=1.12]


Epoch [3/10] - Train Loss: 1.7252, Train Acc: 73.60%, Val Loss: 1.2625, Val Acc: 87.00%


Epoch [4/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.16it/s, loss=0.762]


Epoch [4/10] - Train Loss: 0.9046, Train Acc: 85.60%, Val Loss: 0.4850, Val Acc: 91.00%


Epoch [5/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.97it/s, loss=0.588]


Epoch [5/10] - Train Loss: 0.4514, Train Acc: 90.50%, Val Loss: 0.2616, Val Acc: 94.00%


Epoch [6/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.01it/s, loss=0.253]


Epoch [6/10] - Train Loss: 0.2811, Train Acc: 93.60%, Val Loss: 0.2329, Val Acc: 96.00%


Epoch [7/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.03it/s, loss=0.273]


Epoch [7/10] - Train Loss: 0.2683, Train Acc: 94.10%, Val Loss: 0.2251, Val Acc: 93.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.95it/s, loss=0.654]


Epoch [8/10] - Train Loss: 0.2552, Train Acc: 94.70%, Val Loss: 0.2102, Val Acc: 97.00%


Epoch [9/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.07it/s, loss=0.177]


Epoch [9/10] - Train Loss: 0.2327, Train Acc: 95.70%, Val Loss: 0.2050, Val Acc: 95.00%


Epoch [10/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.30it/s, loss=0.235]


Epoch [10/10] - Train Loss: 0.2225, Train Acc: 96.00%, Val Loss: 0.2003, Val Acc: 95.00%
Best Validation Accuracy: 97.00%
Training with optimizer RAdam and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.91it/s, loss=2.35]


Epoch [1/10] - Train Loss: 2.3115, Train Acc: 12.80%, Val Loss: 2.2868, Val Acc: 18.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.07it/s, loss=2.22]


Epoch [2/10] - Train Loss: 2.2728, Train Acc: 17.20%, Val Loss: 2.2393, Val Acc: 25.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.68it/s, loss=2.25]


Epoch [3/10] - Train Loss: 2.2083, Train Acc: 29.80%, Val Loss: 2.1778, Val Acc: 43.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.76it/s, loss=2.08]


Epoch [4/10] - Train Loss: 2.1237, Train Acc: 48.60%, Val Loss: 2.0854, Val Acc: 54.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.95it/s, loss=1.89]


Epoch [5/10] - Train Loss: 1.9789, Train Acc: 68.40%, Val Loss: 1.9336, Val Acc: 68.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.01it/s, loss=1.92]


Epoch [6/10] - Train Loss: 1.8700, Train Acc: 76.90%, Val Loss: 1.9235, Val Acc: 71.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.08it/s, loss=1.95]


Epoch [7/10] - Train Loss: 1.8446, Train Acc: 80.10%, Val Loss: 1.9001, Val Acc: 72.00%


Epoch [8/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.09it/s, loss=1.86]


Epoch [8/10] - Train Loss: 1.8040, Train Acc: 81.50%, Val Loss: 1.8637, Val Acc: 71.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.11it/s, loss=1.76]


Epoch [9/10] - Train Loss: 1.7505, Train Acc: 81.20%, Val Loss: 1.7810, Val Acc: 76.00%


Epoch [10/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.12it/s, loss=1.75]


Epoch [10/10] - Train Loss: 1.6643, Train Acc: 85.30%, Val Loss: 1.7151, Val Acc: 78.00%
Best Validation Accuracy: 78.00%
Training with optimizer RAdam and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.23it/s, loss=2.32]


Epoch [1/10] - Train Loss: 2.3028, Train Acc: 10.60%, Val Loss: 2.3031, Val Acc: 6.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.25it/s, loss=2.34]


Epoch [2/10] - Train Loss: 2.2898, Train Acc: 11.80%, Val Loss: 2.2866, Val Acc: 9.00%


Epoch [3/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.23it/s, loss=2.23]


Epoch [3/10] - Train Loss: 2.2637, Train Acc: 16.70%, Val Loss: 2.2575, Val Acc: 12.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.24it/s, loss=2.23]


Epoch [4/10] - Train Loss: 2.2271, Train Acc: 23.10%, Val Loss: 2.2309, Val Acc: 19.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.28it/s, loss=2.19]


Epoch [5/10] - Train Loss: 2.1811, Train Acc: 32.90%, Val Loss: 2.1879, Val Acc: 32.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.25it/s, loss=2.18]


Epoch [6/10] - Train Loss: 2.1490, Train Acc: 40.90%, Val Loss: 2.1860, Val Acc: 32.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.32it/s, loss=2.15]


Epoch [7/10] - Train Loss: 2.1422, Train Acc: 42.00%, Val Loss: 2.1847, Val Acc: 35.00%


Epoch [8/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.25it/s, loss=2.17]


Epoch [8/10] - Train Loss: 2.1330, Train Acc: 45.90%, Val Loss: 2.1745, Val Acc: 39.00%


Epoch [9/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.31it/s, loss=2.04]


Epoch [9/10] - Train Loss: 2.1199, Train Acc: 47.30%, Val Loss: 2.1695, Val Acc: 39.00%


Epoch [10/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.26it/s, loss=2.16]


Epoch [10/10] - Train Loss: 2.1145, Train Acc: 48.40%, Val Loss: 2.1567, Val Acc: 41.00%
Best Validation Accuracy: 41.00%
Training with optimizer NAdam and batch size 8


Epoch [1/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.21it/s, loss=1.15]


Epoch [1/10] - Train Loss: 1.8767, Train Acc: 53.00%, Val Loss: 0.8776, Val Acc: 93.00%


Epoch [2/10]: 100%|███████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.23it/s, loss=0.44]


Epoch [2/10] - Train Loss: 0.6441, Train Acc: 85.00%, Val Loss: 0.2904, Val Acc: 94.00%


Epoch [3/10]: 100%|██████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.14it/s, loss=0.923]


Epoch [3/10] - Train Loss: 0.2828, Train Acc: 92.70%, Val Loss: 0.2094, Val Acc: 96.00%


Epoch [4/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.07it/s, loss=0.0298]


Epoch [4/10] - Train Loss: 0.1859, Train Acc: 94.90%, Val Loss: 0.1982, Val Acc: 92.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.00it/s, loss=0.0264]


Epoch [5/10] - Train Loss: 0.1461, Train Acc: 95.50%, Val Loss: 0.1724, Val Acc: 93.00%


Epoch [6/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.24it/s, loss=0.0149]


Epoch [6/10] - Train Loss: 0.0633, Train Acc: 98.30%, Val Loss: 0.0747, Val Acc: 95.00%


Epoch [7/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.21it/s, loss=0.0135]


Epoch [7/10] - Train Loss: 0.0791, Train Acc: 98.10%, Val Loss: 0.0879, Val Acc: 96.00%


Epoch [8/10]: 100%|█████████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.13it/s, loss=0.0307]


Epoch [8/10] - Train Loss: 0.0447, Train Acc: 98.80%, Val Loss: 0.0678, Val Acc: 99.00%


Epoch [9/10]: 100%|███████████████████████████████████████████████████| 125/125 [00:07<00:00, 17.00it/s, loss=0.000853]


Epoch [9/10] - Train Loss: 0.0477, Train Acc: 98.70%, Val Loss: 0.1091, Val Acc: 96.00%


Epoch [10/10]: 100%|███████████████████████████████████████████████████| 125/125 [00:07<00:00, 16.72it/s, loss=0.00734]


Epoch [10/10] - Train Loss: 0.0480, Train Acc: 98.70%, Val Loss: 0.1038, Val Acc: 96.00%
Best Validation Accuracy: 99.00%
Training with optimizer NAdam and batch size 16


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.95it/s, loss=1.94]


Epoch [1/10] - Train Loss: 2.1096, Train Acc: 41.80%, Val Loss: 1.7799, Val Acc: 80.00%


Epoch [2/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.85it/s, loss=0.705]


Epoch [2/10] - Train Loss: 1.1615, Train Acc: 85.50%, Val Loss: 0.4523, Val Acc: 91.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.95it/s, loss=0.883]


Epoch [3/10] - Train Loss: 0.3757, Train Acc: 92.90%, Val Loss: 0.2331, Val Acc: 94.00%


Epoch [4/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.87it/s, loss=0.0784]


Epoch [4/10] - Train Loss: 0.1437, Train Acc: 97.20%, Val Loss: 0.1352, Val Acc: 94.00%


Epoch [5/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00,  9.98it/s, loss=0.0187]


Epoch [5/10] - Train Loss: 0.0657, Train Acc: 98.50%, Val Loss: 0.1291, Val Acc: 93.00%


Epoch [6/10]: 100%|███████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.01it/s, loss=0.0668]


Epoch [6/10] - Train Loss: 0.0410, Train Acc: 99.20%, Val Loss: 0.1380, Val Acc: 93.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.23it/s, loss=0.483]


Epoch [7/10] - Train Loss: 0.0340, Train Acc: 99.50%, Val Loss: 0.1467, Val Acc: 93.00%


Epoch [8/10]: 100%|██████████████████████████████████████████████████████| 63/63 [00:06<00:00, 10.14it/s, loss=0.00842]


Epoch [8/10] - Train Loss: 0.0201, Train Acc: 99.80%, Val Loss: 0.1248, Val Acc: 94.00%
Early stopping triggered
Best Validation Accuracy: 94.00%
Training with optimizer NAdam and batch size 32


Epoch [1/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.39it/s, loss=2.13]


Epoch [1/10] - Train Loss: 2.1906, Train Acc: 32.80%, Val Loss: 2.0309, Val Acc: 64.00%


Epoch [2/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.27it/s, loss=1.93]


Epoch [2/10] - Train Loss: 1.7284, Train Acc: 83.60%, Val Loss: 1.3778, Val Acc: 84.00%


Epoch [3/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.31it/s, loss=0.901]


Epoch [3/10] - Train Loss: 1.0112, Train Acc: 92.40%, Val Loss: 0.6537, Val Acc: 94.00%


Epoch [4/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.40it/s, loss=0.308]


Epoch [4/10] - Train Loss: 0.4810, Train Acc: 96.00%, Val Loss: 0.3316, Val Acc: 95.00%


Epoch [5/10]: 100%|█████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.44it/s, loss=0.19]


Epoch [5/10] - Train Loss: 0.2312, Train Acc: 97.30%, Val Loss: 0.2316, Val Acc: 95.00%


Epoch [6/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.42it/s, loss=0.161]


Epoch [6/10] - Train Loss: 0.1294, Train Acc: 98.90%, Val Loss: 0.2258, Val Acc: 94.00%


Epoch [7/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.37it/s, loss=0.116]


Epoch [7/10] - Train Loss: 0.1321, Train Acc: 99.00%, Val Loss: 0.2261, Val Acc: 94.00%


Epoch [8/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  5.30it/s, loss=0.148]


Epoch [8/10] - Train Loss: 0.1195, Train Acc: 99.10%, Val Loss: 0.2158, Val Acc: 95.00%


Epoch [9/10]: 100%|████████████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.42it/s, loss=0.373]


Epoch [9/10] - Train Loss: 0.1092, Train Acc: 99.30%, Val Loss: 0.2041, Val Acc: 94.00%
Early stopping triggered
Best Validation Accuracy: 95.00%
