# Read dataset and create data loaders

In [184]:
# Import torch and CIFAR dataset
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.optim as optim
import torch.nn.functional as F

# Import matplotlib and numpy for graphs
import matplotlib.pyplot as plt
import numpy as np


In [None]:
'''
Import CIFAR dataset, define labels and load training and validation dataset
Reference for loading dataset: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
Reference for augmentation: https://pytorch.org/vision/stable/transforms.html
'''
batch_size=64 
print('Batch size:', batch_size)

# Normalisation and std values for RGB in dataset
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Data augmentation for training set
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # Randomly crop the image with padding
    transforms.RandomHorizontalFlip(),    # Randomly flip the image horizontally
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust brightness, contrast, saturation, and hue
    transforms.RandomRotation(15),        # Randomly rotate the image by max 15 degrees
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),  # Randomly translate the image
    transforms.ToTensor(),                # Convert image to tensor
    transforms.Normalize(mean=mean, std=std),  # Normalize with mean and std values
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.3))  # Randomly erase a portion of the image
])

# Validation set (no augmentation)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)  # Normalize with mean and std values
])

# Load training and testing datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

# Define labels
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'lorry')

Batch size: 64
Files already downloaded and verified
Files already downloaded and verified


In [186]:
# # From the PyTorch's tutorial on image classification
# import matplotlib.pyplot as plt
# import numpy as np

# def imshow(img):
#     '''
#     Show an image
#     Input: image file to show
#     Output: image
#     '''
#     img = img / 2 + 0.5     # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()

# # Get random training images
# dataiter = iter(trainloader)
# images, labels = next(dataiter)

# # Show images
# imshow(torchvision.utils.make_grid(images))
# # Print labels
# print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

# Main model
Divided as such:


*   **Stem**: takes the images as inputs, extracts features from them
*   **Backbone**: made up of *K* branches, made up of an expert branch
*   **Classifier**: takes input from the last block
*   **Model**: wraps all together







## Stem
*   Takes images as inputs
*   Extracts a feature representation from them

In [187]:
class Stem(nn.Module):
  '''
  Extract features using a Resnet-18 stem
  Reference: Week 09 Lab
  '''
  def __init__(self, input_channels, middle_channels, output_channels):
     super(Stem,self).__init__()
     # Default parameters
     kernel_size=3
     stride=1
     padding=1
     
     # Combine multiple layers
     self.stem = nn.Sequential(
       nn.Conv2d(input_channels, middle_channels, kernel_size = kernel_size, stride = stride, padding = padding),
       nn.BatchNorm2d(middle_channels), 
       nn.ReLU(inplace=True),
       nn.Conv2d(middle_channels, middle_channels,kernel_size = kernel_size, stride = stride, padding = padding),
       nn.BatchNorm2d(middle_channels),
       nn.ReLU(inplace=True),
       nn.MaxPool2d(2), # Half the size of the image
       nn.Conv2d(middle_channels, output_channels, kernel_size = kernel_size, stride = stride, padding = padding),
       nn.BatchNorm2d(output_channels),
       nn.ReLU(inplace=True),
       nn.MaxPool2d(2) # Half the size of the image
       )

  def forward(self,x):
    x = self.stem(x)
    return x

## Block

In [188]:
class ExpertBranch(nn.Module):
  '''
  Expert branch predicting vector a with K elements from input tensor X
  '''
  def __init__(self, input_channels, k, r):
    super(ExpertBranch,self).__init__()
    # Spatially pool x
    self.pool= nn.AdaptiveAvgPool2d(1)
    #Forward through fc1, reducing by r
    self.fc1= nn.Linear(input_channels, input_channels//r)
    # Activation function ReLu
    self.relu= nn.ReLU()
    # Forward through fc2
    self.fc2= nn.Linear(input_channels//r,k)

  def forward(self,x):
    # Spatially pool X
    x = self.pool(x)
    # Forward through fc1, reducing by r
    x= x.squeeze(-1).squeeze(-1)
    x = self.fc1(x)
    # Processed through non-linear activation g
    x = F.relu(x)
    # Pass through fc2
    x = self.fc2(x)
    # Forward with softmax
    x = F.softmax(x,dim=1)
    return x

In [189]:
class Block(nn.Module):
  '''
  Block
  '''
  def __init__(self, input_channels, output_channels, k, r):
    super(Block, self).__init__()
    # Default parameters
    kernel_size=3
    stride=1
    padding=1

    # Set k and expert branch
    self.k= k
    self.expertBranch = ExpertBranch(input_channels, k=k, r=r)

    # Input from first block
    # Input from previous block for rest
    # Generate vector a with K elements from X as a= E(X)
    # Create K convolutional layers
    # self.convs= nn.ModuleList([
    #     nn.Conv2d(input_channels, output_channels, kernel_size=kernel_size, stride= stride, padding=padding)
    #     for _ in range(k)
    # ])

    num_convs_per_path = 2  # Number of conv layers in each path
    self.conv_paths = nn.ModuleList()
    for _ in range(k):
        conv_stack = []
        # First conv in stack: input_channels → output_channels
        conv_stack.append(nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1))
        conv_stack.append(nn.BatchNorm2d(output_channels))
        conv_stack.append(nn.ReLU(inplace=True))
        
        # Additional convs in stack: output_channels → output_channels
        for _ in range(num_convs_per_path - 1):
            conv_stack.append(nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1))
            conv_stack.append(nn.BatchNorm2d(output_channels))
            conv_stack.append(nn.ReLU(inplace=True))
        
        self.conv_paths.append(nn.Sequential(*conv_stack))
    
    # Optional 1x1 conv for identity shortcut if dimensions don't match
    self.use_shortcut = (input_channels != output_channels)
    if self.use_shortcut:
        self.shortcut = nn.Sequential(
            nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(output_channels)
        )

  def forward(self,x):
    identity= x
    # Vector a from expert branch
    a = self.expertBranch(x)
    # Convolutional layers 
    # conv_outputs = [conv(x) for conv in self.convs]
    # stacked = torch.stack(conv_outputs, dim=1)
    # # Create vector O
    # a= a.view(a.size(0), self.k, 1,1,1)

    # out = (a* stacked).sum(dim=1)

    # Apply each VGG-style conv path to the input
    path_outputs = [path(x) for path in self.conv_paths]
    
    # Stack and weight the path outputs using vector a
    stacked = torch.stack(path_outputs, dim=1)
    a = a.view(a.size(0), self.k, 1, 1, 1)  # Reshape for broadcasting
    out = (a * stacked).sum(dim=1)
    
    # Apply skip connection with dimension adjustment if needed
    if self.use_shortcut:
        identity = self.shortcut(identity)
    # Skip connection to stablise gradient descent
    out += identity
    out = F.relu(out) # activation after skip

    return out

## Backbone

In [190]:
# class Backbone(nn.Module):
#   '''
#   N blocks
#   '''
#   def __init__(self, input_channels, hidden_channels, num_blocks, k, r):
#     super(Backbone, self).__init__()
#     self.blocks= nn.ModuleList()

#     # First block takes input from stem
#     self.blocks.append(Block(input_channels, hidden_channels, k=k, r=r))

#     # Rest of blocks take input form previous block
#     for _ in range(1, num_blocks):
#       self.blocks.append(Block(hidden_channels, hidden_channels, k=k, r=r))

#   def forward(self, x):
#     for idx, block in enumerate(self.blocks):
#       x = block(x)
#     return x

In [191]:
class Backbone(nn.Module):
    '''
    Backbone with VGG-like progression using the expert blocks
    '''
    def __init__(self, input_channels, hidden_channels, num_blocks, k, r):
        super(Backbone, self).__init__()
        
        blocks = []
        channels = [input_channels] + [hidden_channels*(2**i) for i in range(num_blocks)]
        
        for i in range(num_blocks):
            # Add VGG-style expert block
            blocks.append(
                Block(
                    input_channels=channels[i],
                    output_channels=channels[i+1],
                    k=k,
                    r=r,
                    # num_convs_per_path=2 if i < 2 else 3  # VGG uses 2 convs in early blocks, 3 in later ones
                )
            )
            
            # Add pooling layer after each block except the last one
            if i < num_blocks - 1:
                blocks.append(nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.backbone = nn.Sequential(*blocks)
    
    def forward(self, x):
        return self.backbone(x)

## Classifier

In [192]:
class Classifier(nn.Module):
  def __init__(self, input_channels, num_classes, use_mlp):
    super(Classifier,self).__init__()
    # Default parameters
    dropout_rate=0.25
    # Spatially pool
    self.pool = nn.AdaptiveAvgPool2d(1)
    self.use_mlp= use_mlp

    if use_mlp:
      self.classifier= nn.Sequential(
          nn.Linear(input_channels, input_channels*2),
          nn.ReLU(),
          nn.Dropout(dropout_rate), # Deeper network with 3 layers
          nn.Linear(input_channels*2, input_channels),
          nn.ReLU(),
          nn.Dropout(dropout_rate),
          nn.Linear(input_channels, num_classes)
      )
    else:
      self.classifier= nn.Linear(input_channels, num_classes)

  def forward(self, x):
    x = self.pool(x).squeeze(-1).squeeze(-1)
    out = self.classifier(x)
    return out


# Model

In [193]:
# class Model(nn.Module):
#   def __init__(self, input_channels, output_channels, middle_channels, hidden_channels, num_blocks, k, r, num_classes, use_mlp):
#     super(Model, self).__init__()
#     # Call stem
#     self.stem= Stem(
#       input_channels=input_channels,
#       middle_channels=middle_channels,
#       output_channels=output_channels
#     )
#     # Call backbone
#     self.backbone= Backbone(
#       input_channels=output_channels, 
#       hidden_channels= hidden_channels, 
#       num_blocks=num_blocks,
#       k=k, 
#       r=r)
#     # Call classifier
#     self.classifier= Classifier(
#       input_channels=hidden_channels, 
#       num_classes=num_classes,
#       use_mlp= use_mlp)

#   def forward(self,x):
#     x= self.stem(x)
#     x= self.backbone(x)
#     x= self.classifier(x)
#     return x

In [194]:
class Model(nn.Module):
    def __init__(self, input_channels, output_channels, middle_channels, hidden_channels, num_blocks, k, r, num_classes, use_mlp):
        super(Model, self).__init__()
        
        # Keep your existing stem
        self.stem = Stem(
            input_channels=input_channels,
            middle_channels=middle_channels,
            output_channels=output_channels
        )
        
        # Use VGG-style backbone with expert blocks
        self.backbone = Backbone(
            input_channels=output_channels,
            hidden_channels=hidden_channels,
            num_blocks=num_blocks,
            k=k,
            r=r
        )
        
        # Calculate the output channels from the backbone
        final_channels = hidden_channels * (2**(num_blocks-1))
        
        # Classifier remains the same
        self.classifier = Classifier(
            input_channels=final_channels,
            num_classes=num_classes,
            use_mlp=use_mlp
        )
    
    def forward(self, x):
        x = self.stem(x)
        x = self.backbone(x)
        x = self.classifier(x)
        return x

# Create the loss and optmiser


In [195]:
# model = Model(
#     input_channels=3,
#     output_channels=128,
#     middle_channels=64,
#     hidden_channels=128,
#     num_blocks=7,
#     k=4,
#     r=8,
#     num_classes=10,
#     use_mlp=True
# )

model = Model(
    input_channels=3,
    output_channels=64,
    middle_channels=32,
    hidden_channels=64,  # Start with 64 channels and double after each block
    num_blocks=4,        # 4-5 blocks is typical for VGG
    k=4,                 # Keep your expert branches
    r=8,
    num_classes=10,
    use_mlp=True
)
# Weight Initialisation using He initialisation
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

model.apply(init_weights)

criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9, nesterov=True)
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)

# Training & Testing

In [None]:
# Set up device 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Save model
model.to(device)

# Log training 
train_losses, val_losses = [], []
train_accuracies = []
val_accuracies = []

# Training and Validation Loops 
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(loader, desc="Training", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    return running_loss / len(loader), 100 * correct / total

def evaluate(model, loader, criterion, device):
    model.eval()
    total = 0
    correct = 0
    loss = 0.0

    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Validating", leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss += criterion(outputs, labels).item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return loss / len(loader), 100 * correct / total

# Main Loop 
patience = 10  # Number of epochs to wait for improvement
early_stop_counter = 0 # Counter for early stopping
epochs = 100
best_acc = 0.0

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    train_loss, train_acc = train(model, trainloader, criterion, optimizer, device)
    val_loss, val_acc = evaluate(model, testloader, criterion, device)


    # Log metrics
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    print(f"Train Loss: {train_loss:.4f} | Accuracy: {train_acc:.2f}%")
    print(f"Val   Loss: {val_loss:.4f} | Accuracy: {val_acc:.2f}%")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        early_stop_counter=0
        torch.save(model.state_dict(), "best_model.pth")
        print("Saved best model.")
    else:
        early_stop_counter += 1
        print(f"No improvement for {early_stop_counter} epochs.")

    if early_stop_counter >= patience:
        print(f"Early stopping triggered after {epoch+1} epochs.")
        break
print("\nTraining Complete")

# Print Final Averages 
avg_train_loss = sum(train_losses) / len(train_losses)
avg_val_loss = sum(val_losses) / len(val_losses)
avg_train_acc = sum(train_accuracies) / len(train_accuracies)
avg_val_acc = sum(val_accuracies) / len(val_accuracies)

print("\nFinal Averages Over All Epochs")
print(f"Average Train Loss: {avg_train_loss:.4f}")
print(f"Average Train Accuracy: {avg_train_acc:.2f}%")
print(f"Average Val   Loss: {avg_val_loss:.4f}")
print(f"Average Val   Accuracy: {avg_val_acc:.2f}%")


# Plot results

# Plot Loss
plt.figure()
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title("Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.savefig("loss_curve.png")

# Plot Accuracy
plt.figure()
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title("Accuracy Curve")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.legend()
plt.grid()
plt.savefig("accuracy_curve.png")

print("Plots saved: loss_curve.png and accuracy_curve.png")



Epoch 1/100


                                                             

Train Loss: 2.0680 | Accuracy: 25.62%
Val   Loss: 1.7176 | Accuracy: 36.38%
Saved best model.

Epoch 2/100


                                                             

Train Loss: 1.7670 | Accuracy: 34.73%
Val   Loss: 1.4926 | Accuracy: 44.72%
Saved best model.

Epoch 3/100


                                                              

Train Loss: 1.6528 | Accuracy: 39.32%
Val   Loss: 1.4038 | Accuracy: 48.16%
Saved best model.

Epoch 4/100


                                                              

Train Loss: 1.5656 | Accuracy: 43.02%
Val   Loss: 1.2948 | Accuracy: 52.94%
Saved best model.

Epoch 5/100


                                                              

Train Loss: 1.5009 | Accuracy: 45.91%
Val   Loss: 1.2383 | Accuracy: 55.20%
Saved best model.

Epoch 6/100


                                                              

Train Loss: 1.4506 | Accuracy: 47.50%
Val   Loss: 1.1867 | Accuracy: 56.84%
Saved best model.

Epoch 7/100


                                                             

Train Loss: 1.4013 | Accuracy: 49.49%
Val   Loss: 1.1479 | Accuracy: 58.03%
Saved best model.

Epoch 8/100


                                                             

Train Loss: 1.3634 | Accuracy: 50.63%
Val   Loss: 1.1191 | Accuracy: 59.89%
Saved best model.

Epoch 9/100


                                                              

Train Loss: 1.3263 | Accuracy: 52.53%
Val   Loss: 1.0407 | Accuracy: 62.02%
Saved best model.

Epoch 10/100


                                                              

Train Loss: 1.2907 | Accuracy: 53.69%
Val   Loss: 1.0445 | Accuracy: 61.87%
No improvement for 1 epochs.

Epoch 11/100


                                                              

Train Loss: 1.2625 | Accuracy: 54.74%
Val   Loss: 1.0011 | Accuracy: 64.56%
Saved best model.

Epoch 12/100


                                                              

Train Loss: 1.2367 | Accuracy: 56.12%
Val   Loss: 1.0000 | Accuracy: 63.86%
No improvement for 1 epochs.

Epoch 13/100


                                                              

Train Loss: 1.2103 | Accuracy: 57.13%
Val   Loss: 0.9520 | Accuracy: 66.17%
Saved best model.

Epoch 14/100


                                                              

Train Loss: 1.1847 | Accuracy: 57.76%
Val   Loss: 0.9209 | Accuracy: 66.98%
Saved best model.

Epoch 15/100


                                                              

Train Loss: 1.1592 | Accuracy: 58.74%
Val   Loss: 0.8959 | Accuracy: 67.94%
Saved best model.

Epoch 16/100


                                                             

Train Loss: 1.1389 | Accuracy: 59.58%
Val   Loss: 0.8880 | Accuracy: 68.77%
Saved best model.

Epoch 17/100


                                                              

Train Loss: 1.1183 | Accuracy: 60.10%
Val   Loss: 0.8469 | Accuracy: 69.66%
Saved best model.

Epoch 18/100


                                                              

Train Loss: 1.0939 | Accuracy: 61.20%
Val   Loss: 0.8462 | Accuracy: 69.85%
Saved best model.

Epoch 19/100


                                                             

Train Loss: 1.0847 | Accuracy: 61.52%
Val   Loss: 0.8234 | Accuracy: 70.10%
Saved best model.

Epoch 20/100


                                                              

Train Loss: 1.0565 | Accuracy: 62.59%
Val   Loss: 0.8101 | Accuracy: 71.01%
Saved best model.

Epoch 21/100


Validating:  61%|██████    | 95/157 [00:01<00:00, 92.22it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

                                                             

Train Loss: 0.9289 | Accuracy: 66.98%
Val   Loss: 0.6817 | Accuracy: 75.79%
Saved best model.

Epoch 30/100


                                                              

Train Loss: 0.9183 | Accuracy: 67.57%
Val   Loss: 0.6910 | Accuracy: 75.59%
No improvement for 1 epochs.

Epoch 31/100


                                                             

Train Loss: 0.9057 | Accuracy: 67.99%
Val   Loss: 0.6951 | Accuracy: 76.04%
Saved best model.

Epoch 32/100


                                                              

Train Loss: 0.8998 | Accuracy: 68.36%
Val   Loss: 0.6626 | Accuracy: 76.87%
Saved best model.

Epoch 33/100


                                                             

Train Loss: 0.8920 | Accuracy: 68.50%
Val   Loss: 0.6524 | Accuracy: 77.25%
Saved best model.

Epoch 34/100


                                                              

Train Loss: 0.8796 | Accuracy: 69.20%
Val   Loss: 0.6393 | Accuracy: 77.72%
Saved best model.

Epoch 35/100


                                                              

Train Loss: 0.8731 | Accuracy: 69.27%
Val   Loss: 0.6355 | Accuracy: 77.86%
Saved best model.

Epoch 36/100


                                                              

Train Loss: 0.8571 | Accuracy: 69.98%
Val   Loss: 0.6132 | Accuracy: 78.82%
Saved best model.

Epoch 37/100


                                                              

Train Loss: 0.8523 | Accuracy: 69.87%
Val   Loss: 0.6146 | Accuracy: 78.69%
No improvement for 1 epochs.

Epoch 38/100


                                                              

Train Loss: 0.8427 | Accuracy: 70.34%
Val   Loss: 0.6262 | Accuracy: 78.40%
No improvement for 2 epochs.

Epoch 39/100


                                                              

Train Loss: 0.8349 | Accuracy: 70.83%
Val   Loss: 0.6072 | Accuracy: 79.01%
Saved best model.

Epoch 40/100


                                                              

Train Loss: 0.8226 | Accuracy: 71.06%
Val   Loss: 0.6149 | Accuracy: 78.51%
No improvement for 1 epochs.

Epoch 41/100


                                                              

Train Loss: 0.8162 | Accuracy: 71.39%
Val   Loss: 0.6037 | Accuracy: 79.09%
Saved best model.

Epoch 42/100


                                                              

Train Loss: 0.8077 | Accuracy: 71.61%
Val   Loss: 0.5940 | Accuracy: 79.32%
Saved best model.

Epoch 43/100


                                                             

Train Loss: 0.8049 | Accuracy: 71.55%
Val   Loss: 0.5916 | Accuracy: 79.45%
Saved best model.

Epoch 44/100


                                                              

Train Loss: 0.7978 | Accuracy: 72.09%
Val   Loss: 0.5850 | Accuracy: 79.67%
Saved best model.

Epoch 45/100


                                                             

Train Loss: 0.7932 | Accuracy: 71.97%
Val   Loss: 0.5823 | Accuracy: 79.63%
No improvement for 1 epochs.

Epoch 46/100


                                                              

Train Loss: 0.7849 | Accuracy: 72.45%
Val   Loss: 0.5653 | Accuracy: 80.56%
Saved best model.

Epoch 47/100


                                                             

Train Loss: 0.7789 | Accuracy: 72.50%
Val   Loss: 0.5663 | Accuracy: 80.30%
No improvement for 1 epochs.

Epoch 48/100


                                                              

Train Loss: 0.7691 | Accuracy: 72.77%
Val   Loss: 0.5737 | Accuracy: 80.43%
No improvement for 2 epochs.

Epoch 49/100


                                                             

Train Loss: 0.7637 | Accuracy: 73.04%
Val   Loss: 0.5653 | Accuracy: 80.58%
Saved best model.

Epoch 50/100


                                                              

Train Loss: 0.7567 | Accuracy: 73.31%
Val   Loss: 0.5639 | Accuracy: 80.86%
Saved best model.

Epoch 51/100


                                                              

Train Loss: 0.7529 | Accuracy: 73.31%
Val   Loss: 0.5772 | Accuracy: 79.45%
No improvement for 1 epochs.

Epoch 52/100


                                                              

Train Loss: 0.7495 | Accuracy: 73.53%
Val   Loss: 0.5564 | Accuracy: 81.13%
Saved best model.

Epoch 53/100


                                                              

Train Loss: 0.7402 | Accuracy: 73.99%
Val   Loss: 0.5602 | Accuracy: 80.97%
No improvement for 1 epochs.

Epoch 54/100


                                                             

Train Loss: 0.7299 | Accuracy: 74.37%
Val   Loss: 0.5578 | Accuracy: 80.82%
No improvement for 2 epochs.

Epoch 55/100


                                                             

Train Loss: 0.7252 | Accuracy: 74.54%
Val   Loss: 0.5381 | Accuracy: 81.67%
Saved best model.

Epoch 56/100


                                                              

Train Loss: 0.7228 | Accuracy: 74.53%
Val   Loss: 0.5385 | Accuracy: 82.11%
Saved best model.

Epoch 57/100


                                                             

Train Loss: 0.7199 | Accuracy: 74.87%
Val   Loss: 0.5412 | Accuracy: 81.36%
No improvement for 1 epochs.

Epoch 58/100


                                                             

Train Loss: 0.7086 | Accuracy: 75.25%
Val   Loss: 0.5325 | Accuracy: 82.34%
Saved best model.

Epoch 59/100


                                                              

Train Loss: 0.7066 | Accuracy: 75.13%
Val   Loss: 0.5254 | Accuracy: 82.32%
No improvement for 1 epochs.

Epoch 60/100


                                                              

Train Loss: 0.7000 | Accuracy: 75.27%
Val   Loss: 0.5236 | Accuracy: 82.24%
No improvement for 2 epochs.

Epoch 61/100


                                                              

Train Loss: 0.6981 | Accuracy: 75.41%
Val   Loss: 0.5303 | Accuracy: 82.11%
No improvement for 3 epochs.

Epoch 62/100


                                                              

Train Loss: 0.6854 | Accuracy: 75.97%
Val   Loss: 0.5069 | Accuracy: 83.03%
Saved best model.

Epoch 63/100


                                                              

Train Loss: 0.6839 | Accuracy: 75.84%
Val   Loss: 0.5258 | Accuracy: 82.44%
No improvement for 1 epochs.

Epoch 64/100


                                                              

Train Loss: 0.6727 | Accuracy: 76.19%
Val   Loss: 0.5257 | Accuracy: 81.98%
No improvement for 2 epochs.

Epoch 65/100


                                                              

Train Loss: 0.6731 | Accuracy: 76.19%
Val   Loss: 0.5081 | Accuracy: 82.68%
No improvement for 3 epochs.

Epoch 66/100


                                                              

Train Loss: 0.6679 | Accuracy: 76.50%
Val   Loss: 0.5149 | Accuracy: 82.41%
No improvement for 4 epochs.

Epoch 67/100


                                                              

Train Loss: 0.6678 | Accuracy: 76.44%
Val   Loss: 0.5050 | Accuracy: 83.05%
Saved best model.

Epoch 68/100


                                                             

Train Loss: 0.6606 | Accuracy: 76.96%
Val   Loss: 0.5219 | Accuracy: 82.25%
No improvement for 1 epochs.

Epoch 69/100


                                                              

Train Loss: 0.6510 | Accuracy: 76.94%
Val   Loss: 0.5117 | Accuracy: 83.04%
No improvement for 2 epochs.

Epoch 70/100


                                                             

Train Loss: 0.6549 | Accuracy: 76.90%
Val   Loss: 0.5026 | Accuracy: 83.32%
Saved best model.

Epoch 71/100


                                                              

Train Loss: 0.6435 | Accuracy: 77.32%
Val   Loss: 0.4812 | Accuracy: 83.64%
Saved best model.

Epoch 72/100


                                                             

Train Loss: 0.6431 | Accuracy: 77.30%
Val   Loss: 0.5073 | Accuracy: 83.17%
No improvement for 1 epochs.

Epoch 73/100


                                                              

Train Loss: 0.6415 | Accuracy: 77.48%
Val   Loss: 0.5007 | Accuracy: 83.20%
No improvement for 2 epochs.

Epoch 74/100


                                                              

Train Loss: 0.6357 | Accuracy: 77.75%
Val   Loss: 0.5035 | Accuracy: 83.20%
No improvement for 3 epochs.

Epoch 75/100


                                                             

Train Loss: 0.6277 | Accuracy: 77.93%
Val   Loss: 0.4988 | Accuracy: 83.66%
Saved best model.

Epoch 76/100


                                                             

Train Loss: 0.6290 | Accuracy: 77.85%
Val   Loss: 0.4844 | Accuracy: 83.55%
No improvement for 1 epochs.

Epoch 77/100


                                                             

Train Loss: 0.6285 | Accuracy: 77.94%
Val   Loss: 0.4971 | Accuracy: 83.36%
No improvement for 2 epochs.

Epoch 78/100


                                                              

Train Loss: 0.6197 | Accuracy: 77.93%
Val   Loss: 0.4996 | Accuracy: 83.49%
No improvement for 3 epochs.

Epoch 79/100


                                                              

Train Loss: 0.6213 | Accuracy: 78.35%
Val   Loss: 0.4766 | Accuracy: 84.53%
Saved best model.

Epoch 80/100


                                                             

Train Loss: 0.6102 | Accuracy: 78.59%
Val   Loss: 0.4786 | Accuracy: 84.21%
No improvement for 1 epochs.

Epoch 81/100


                                                             

Train Loss: 0.6060 | Accuracy: 78.57%
Val   Loss: 0.4856 | Accuracy: 84.15%
No improvement for 2 epochs.

Epoch 82/100


                                                              

Train Loss: 0.6004 | Accuracy: 78.95%
Val   Loss: 0.4836 | Accuracy: 84.00%
No improvement for 3 epochs.

Epoch 83/100


                                                              

Train Loss: 0.5991 | Accuracy: 78.84%
Val   Loss: 0.4900 | Accuracy: 83.99%
No improvement for 4 epochs.

Epoch 84/100


                                                             

Train Loss: 0.5945 | Accuracy: 79.08%
Val   Loss: 0.4639 | Accuracy: 84.89%
Saved best model.

Epoch 85/100


                                                              

Train Loss: 0.5940 | Accuracy: 79.06%
Val   Loss: 0.4795 | Accuracy: 84.09%
No improvement for 1 epochs.

Epoch 86/100


                                                              

Train Loss: 0.5883 | Accuracy: 79.25%
Val   Loss: 0.4795 | Accuracy: 84.60%
No improvement for 2 epochs.

Epoch 87/100


                                                              

Train Loss: 0.5809 | Accuracy: 79.35%
Val   Loss: 0.4727 | Accuracy: 84.55%
No improvement for 3 epochs.

Epoch 88/100


                                                              

Train Loss: 0.5796 | Accuracy: 79.63%
Val   Loss: 0.4894 | Accuracy: 84.00%
No improvement for 4 epochs.

Epoch 89/100


                                                              

Train Loss: 0.5777 | Accuracy: 79.68%
Val   Loss: 0.4699 | Accuracy: 84.68%
No improvement for 5 epochs.

Epoch 90/100


Training:   0%|          | 0/782 [00:00<?, ?it/s]