# Training vit_small_patch16_224 from scratch (no pre-trained weights) on the Food-101 dataset with the default attention block #
## Author - Thomas O'Sullivan ##

### This notebook reduces the model to have 10/12 attention blocks, a drop rate of 0.3, batch size of 32, weight decay of 1e-4, and is trained with a base learning rate of 1e-5 as it progresses through our LR schedule defined in cell 4. ###

### This cell imports libraries for deep learning, data handling, and visualization, and sets random seeds for reproducibility. ###

In [1]:
import time
import copy
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import numpy as np
import random


torch.manual_seed(31)
random.seed(31)
np.random.seed(31)

### This cell defines image transformations for training and validation. Training data is augmented with cropping, flipping, and color jittering, while validation data is resized and center cropped. Both are converted to tensors and normalized using ImageNet statistics. ###

In [2]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


### This cell loads the Food101 dataset with the defined transforms and creates data loaders for training and validation. It sets a batch size of 32 and enables shuffling for the training loader. It also prints the number of samples in each set. ###

In [3]:
train_dataset = torchvision.datasets.Food101(
    root='./data',
    split='train',
    transform=train_transform,
    download=True
)

val_dataset = torchvision.datasets.Food101(
    root='./data',
    split='test',
    transform=val_transform,
    download=True
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)  ####### Expiremental #######
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)  ####### Expiremental #######

print(f"Train samples: {len(train_dataset)}")
print(f"Val samples:   {len(val_dataset)}")


Train samples: 75750
Val samples:   25250


### This cell defines a custom Vision Transformer model using only the first 10 transformer blocks of vit_small_patch16_224. It disables pretraining, sets dropout rates, and customizes the forward pass. The model is then moved to GPU if available and printed. ###

In [4]:
from timm import create_model
import torch.nn as nn

class ViTLayerReduction(nn.Module):
    def __init__(self):
        super().__init__()
        full_model = create_model(
            "vit_small_patch16_224",
            pretrained=False,
            num_classes=101,  ####### Expiremental #######
            drop_rate=0.3,  ####### Expiremental #######
            drop_path_rate=0.1 ####### Expiremental #######
        )

        self.patch_embed = full_model.patch_embed
        self.cls_token = full_model.cls_token
        self.pos_embed = full_model.pos_embed
        self.pos_drop = full_model.pos_drop

        self.blocks = nn.Sequential(*list(full_model.blocks[:10]))  ####### Expiremental #######

        self.norm = full_model.norm
        self.head = full_model.head

    def forward(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        x = self.pos_drop(x)
        x = self.blocks(x)
        x = self.norm(x)
        return self.head(x[:, 0])


model = ViTLayerReduction()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(device)
print(model)



cuda
ViTLayerReduction(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (fc2): Lin

### This cell defines the learning rate schedule using warmup, linear ramp-up, and cosine decay phases. It also sets hyperparameters like learning rates, epoch counts, and weight decay. The compute_scheduled_lr function calculates the learning rate for a given epoch. ###

In [5]:
base_learning_rate = 1e-5 ####### Expiremental #######
peak_learning_rate = 1e-4 ####### Expiremental #######
final_lr_fraction = 0.10 ####### Expiremental #######

num_epochs = 60 ####### Expiremental #######
warmup_epochs = 15 ####### Expiremental #######
rampup_epochs = 15 ####### Expiremental #######
decay_epochs = num_epochs - (warmup_epochs + rampup_epochs)

weight_decay = 1e-4 ####### Expiremental #######

lr_history = []

####### Expiremental #######
def compute_scheduled_lr(epoch_step):
    """Flat Warmup -> Linear Rampup -> Cosine Decay."""
    if epoch_step < warmup_epochs:
        return base_learning_rate
    elif epoch_step < warmup_epochs + rampup_epochs:
        progress = (epoch_step - warmup_epochs) / rampup_epochs
        return base_learning_rate + progress * (peak_learning_rate - base_learning_rate)
    else:
        decay_progress = (epoch_step - warmup_epochs - rampup_epochs) / max(1, decay_epochs)
        cosine_decay = 0.5 * (1 + math.cos(math.pi * decay_progress))
        return final_lr_fraction * peak_learning_rate + (1 - final_lr_fraction) * peak_learning_rate * cosine_decay



### This cell trains the model over multiple epochs using the AdamW optimizer and label smoothed cross entropy loss. It logs training/validation loss, accuracy, and learning rate to TensorBoard, while applying the custom learning rate schedule. The model weights with the best validation accuracy are saved and restored at the end. ###

In [6]:
optimizer = optim.AdamW(model.parameters(), lr=base_learning_rate, weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('runs/food101_vit_experiment')

best_val_acc = 0.0
best_model_wts = None

for epoch in range(num_epochs):
    epoch_start = time.time()
    print(f"Epoch {epoch+1}/{num_epochs}")
    print("-" * 40)

    model.train()
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in tqdm(train_loader, desc="Training", leave=False):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)
    print(f"Train Loss: {epoch_loss:.4f}  Train Acc: {epoch_acc:.4f}")

    model.eval()
    val_running_loss = 0.0
    val_running_corrects = 0

    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Validation", leave=False):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            val_running_corrects += torch.sum(preds == labels.data)

    val_epoch_loss = val_running_loss / len(val_dataset)
    val_epoch_acc = val_running_corrects.double() / len(val_dataset)
    print(f"Val Loss: {val_epoch_loss:.4f}  Val Acc: {val_epoch_acc:.4f}")


    writer.add_scalar('Loss/Train', epoch_loss, epoch)
    writer.add_scalar('Accuracy/Train', epoch_acc.item(), epoch)
    writer.add_scalar('Loss/Validation', val_epoch_loss, epoch)
    writer.add_scalar('Accuracy/Validation', val_epoch_acc.item(), epoch)

    current_lr_epoch = epoch
    new_lr = compute_scheduled_lr(current_lr_epoch)
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr
    lr_history.append(new_lr)
    writer.add_scalar('Learning Rate', new_lr, epoch)

    if val_epoch_acc > best_val_acc:
        best_val_acc = val_epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())

    epoch_duration = time.time() - epoch_start
    print(f"Epoch {epoch+1} completed in {epoch_duration:.2f} seconds\n")

if best_model_wts is not None:
    model.load_state_dict(best_model_wts)
print(f"Best Validation Accuracy: {best_val_acc:.4f}")

writer.close()



Epoch 1/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 4.4684  Train Acc: 0.0379


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 4.2523  Val Acc: 0.0738
Epoch 1 completed in 276.24 seconds

Epoch 2/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 4.2592  Train Acc: 0.0725


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 4.0492  Val Acc: 0.1189
Epoch 2 completed in 248.57 seconds

Epoch 3/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 4.1184  Train Acc: 0.0999


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.9085  Val Acc: 0.1457
Epoch 3 completed in 240.58 seconds

Epoch 4/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 4.0139  Train Acc: 0.1186


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.8180  Val Acc: 0.1579
Epoch 4 completed in 237.91 seconds

Epoch 5/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.9249  Train Acc: 0.1398


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.7304  Val Acc: 0.1822
Epoch 5 completed in 239.59 seconds

Epoch 6/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.8425  Train Acc: 0.1531


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.6279  Val Acc: 0.2006
Epoch 6 completed in 238.20 seconds

Epoch 7/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.7670  Train Acc: 0.1703


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.5589  Val Acc: 0.2150
Epoch 7 completed in 237.29 seconds

Epoch 8/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.6961  Train Acc: 0.1839


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.4834  Val Acc: 0.2324
Epoch 8 completed in 237.28 seconds

Epoch 9/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.6277  Train Acc: 0.2006


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.4296  Val Acc: 0.2448
Epoch 9 completed in 239.87 seconds

Epoch 10/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.5681  Train Acc: 0.2125


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.3642  Val Acc: 0.2598
Epoch 10 completed in 240.29 seconds

Epoch 11/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.5079  Train Acc: 0.2261


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.3270  Val Acc: 0.2704
Epoch 11 completed in 240.88 seconds

Epoch 12/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.4516  Train Acc: 0.2391


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.2746  Val Acc: 0.2804
Epoch 12 completed in 240.57 seconds

Epoch 13/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.4016  Train Acc: 0.2500


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.2299  Val Acc: 0.2932
Epoch 13 completed in 239.73 seconds

Epoch 14/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.3462  Train Acc: 0.2636


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.1730  Val Acc: 0.3042
Epoch 14 completed in 240.26 seconds

Epoch 15/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.3024  Train Acc: 0.2745


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.1554  Val Acc: 0.3078
Epoch 15 completed in 238.72 seconds

Epoch 16/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.2497  Train Acc: 0.2864


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.1127  Val Acc: 0.3213
Epoch 16 completed in 238.85 seconds

Epoch 17/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.2093  Train Acc: 0.2966


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.0044  Val Acc: 0.3497
Epoch 17 completed in 240.10 seconds

Epoch 18/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.2325  Train Acc: 0.2916


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.0680  Val Acc: 0.3290
Epoch 18 completed in 244.29 seconds

Epoch 19/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.2452  Train Acc: 0.2858


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.0321  Val Acc: 0.3407
Epoch 19 completed in 246.59 seconds

Epoch 20/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.2396  Train Acc: 0.2873


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.0392  Val Acc: 0.3390
Epoch 20 completed in 248.14 seconds

Epoch 21/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.2102  Train Acc: 0.2950


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.0918  Val Acc: 0.3221
Epoch 21 completed in 245.98 seconds

Epoch 22/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.1768  Train Acc: 0.3053


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 3.0375  Val Acc: 0.3411
Epoch 22 completed in 247.01 seconds

Epoch 23/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.1365  Train Acc: 0.3152


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.9865  Val Acc: 0.3545
Epoch 23 completed in 245.38 seconds

Epoch 24/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.0825  Train Acc: 0.3282


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.8730  Val Acc: 0.3853
Epoch 24 completed in 237.53 seconds

Epoch 25/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 3.0223  Train Acc: 0.3437


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.8868  Val Acc: 0.3783
Epoch 25 completed in 239.32 seconds

Epoch 26/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.9610  Train Acc: 0.3579


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.8030  Val Acc: 0.4020
Epoch 26 completed in 241.91 seconds

Epoch 27/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.8932  Train Acc: 0.3761


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.7333  Val Acc: 0.4171
Epoch 27 completed in 239.83 seconds

Epoch 28/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.8290  Train Acc: 0.3917


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.6916  Val Acc: 0.4330
Epoch 28 completed in 239.17 seconds

Epoch 29/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.7631  Train Acc: 0.4091


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.6653  Val Acc: 0.4383
Epoch 29 completed in 236.05 seconds

Epoch 30/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.6846  Train Acc: 0.4303


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5715  Val Acc: 0.4661
Epoch 30 completed in 236.76 seconds

Epoch 31/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.6125  Train Acc: 0.4483


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.6119  Val Acc: 0.4589
Epoch 31 completed in 236.94 seconds

Epoch 32/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.5432  Train Acc: 0.4679


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5431  Val Acc: 0.4726
Epoch 32 completed in 238.55 seconds

Epoch 33/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.4371  Train Acc: 0.4963


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4979  Val Acc: 0.4894
Epoch 33 completed in 239.08 seconds

Epoch 34/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.3304  Train Acc: 0.5243


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5317  Val Acc: 0.4763
Epoch 34 completed in 238.34 seconds

Epoch 35/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.2087  Train Acc: 0.5610


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4667  Val Acc: 0.5026
Epoch 35 completed in 237.32 seconds

Epoch 36/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 2.0889  Train Acc: 0.5957


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4169  Val Acc: 0.5187
Epoch 36 completed in 239.52 seconds

Epoch 37/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.9628  Train Acc: 0.6332


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4287  Val Acc: 0.5204
Epoch 37 completed in 238.30 seconds

Epoch 38/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.8357  Train Acc: 0.6756


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5028  Val Acc: 0.5081
Epoch 38 completed in 238.18 seconds

Epoch 39/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.7152  Train Acc: 0.7137


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4643  Val Acc: 0.5234
Epoch 39 completed in 237.08 seconds

Epoch 40/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.5893  Train Acc: 0.7559


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4827  Val Acc: 0.5277
Epoch 40 completed in 238.16 seconds

Epoch 41/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.4783  Train Acc: 0.7982


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5532  Val Acc: 0.5128
Epoch 41 completed in 238.43 seconds

Epoch 42/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.3811  Train Acc: 0.8317


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5420  Val Acc: 0.5220
Epoch 42 completed in 241.55 seconds

Epoch 43/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.2955  Train Acc: 0.8660


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5841  Val Acc: 0.5147
Epoch 43 completed in 241.85 seconds

Epoch 44/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.2264  Train Acc: 0.8895


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5649  Val Acc: 0.5246
Epoch 44 completed in 236.58 seconds

Epoch 45/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.1665  Train Acc: 0.9130


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5761  Val Acc: 0.5236
Epoch 45 completed in 237.81 seconds

Epoch 46/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.1148  Train Acc: 0.9317


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5836  Val Acc: 0.5250
Epoch 46 completed in 238.85 seconds

Epoch 47/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.0764  Train Acc: 0.9451


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5891  Val Acc: 0.5212
Epoch 47 completed in 239.00 seconds

Epoch 48/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.0419  Train Acc: 0.9544


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5798  Val Acc: 0.5253
Epoch 48 completed in 241.22 seconds

Epoch 49/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 1.0094  Train Acc: 0.9645


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5408  Val Acc: 0.5324
Epoch 49 completed in 240.95 seconds

Epoch 50/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.9817  Train Acc: 0.9728


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5230  Val Acc: 0.5345
Epoch 50 completed in 238.32 seconds

Epoch 51/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.9636  Train Acc: 0.9765


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5199  Val Acc: 0.5378
Epoch 51 completed in 237.45 seconds

Epoch 52/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.9418  Train Acc: 0.9817


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.5000  Val Acc: 0.5394
Epoch 52 completed in 238.95 seconds

Epoch 53/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.9240  Train Acc: 0.9857


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4854  Val Acc: 0.5459
Epoch 53 completed in 237.94 seconds

Epoch 54/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.9083  Train Acc: 0.9885


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4898  Val Acc: 0.5455
Epoch 54 completed in 237.53 seconds

Epoch 55/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.8971  Train Acc: 0.9899


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4956  Val Acc: 0.5438
Epoch 55 completed in 236.31 seconds

Epoch 56/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.8855  Train Acc: 0.9919


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4856  Val Acc: 0.5461
Epoch 56 completed in 237.67 seconds

Epoch 57/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.8761  Train Acc: 0.9933


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4798  Val Acc: 0.5484
Epoch 57 completed in 236.07 seconds

Epoch 58/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.8685  Train Acc: 0.9941


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4593  Val Acc: 0.5543
Epoch 58 completed in 238.75 seconds

Epoch 59/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.8634  Train Acc: 0.9949


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4621  Val Acc: 0.5554
Epoch 59 completed in 241.20 seconds

Epoch 60/60
----------------------------------------


Training:   0%|          | 0/2368 [00:00<?, ?it/s]

Train Loss: 0.8587  Train Acc: 0.9948


Validation:   0%|          | 0/790 [00:00<?, ?it/s]

Val Loss: 2.4600  Val Acc: 0.5556
Epoch 60 completed in 238.80 seconds

Best Validation Accuracy: 0.5556
