# Read dataset and create data loaders

In [80]:
# Import torch and CIFAR dataset
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.optim as optim
import torch.nn.functional as F

# Import matplotlib and numpy for graphs
import matplotlib.pyplot as plt
import numpy as np


In [81]:
'''
Import CIFAR dataset, define labbels and load training and validation dataset
Reference for loading dataset: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
Reference for augmentation: https://pytorch.org/vision/stable/transforms.html
'''
batch_size=64 
print('Batch size:', batch_size)

# Normalisation and std values for RGB in dataset
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Data augmentation for training set
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # Randomly crop the image with padding
    transforms.RandomHorizontalFlip(),    # Randomly flip the image horizontally
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust brightness, contrast, etc.
    transforms.RandomRotation(15),        # Randomly rotate the image by up to 15 degrees
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),  # Randomly translate the image
    transforms.ToTensor(),                # Convert image to tensor
    transforms.Normalize(mean=mean, std=std),  # Normalize with mean and std
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.3))  # Randomly erase a portion of the image (optional)
])

# No augmentation for test set (only normalization)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)  # Normalize with mean and std
])

# Load training and testing datasets
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

# Define labels
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'lorry')

Batch size: 64
Files already downloaded and verified
Files already downloaded and verified


In [82]:
# # From the PyTorch's tutorial on image classification
# import matplotlib.pyplot as plt
# import numpy as np

# def imshow(img):
#     '''
#     Show an image
#     Input: image file to show
#     Output: image
#     '''
#     img = img / 2 + 0.5     # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()

# # Get random training images
# dataiter = iter(trainloader)
# images, labels = next(dataiter)

# # Show images
# imshow(torchvision.utils.make_grid(images))
# # Print labels
# print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

# Main model
Divided as such:


*   **Stem**: takes the images as inputs, extracts features from them
*   **Backbone**: made up of *K* branches, made up of an expert branch
*   **Classifier**: takes input from the last block
*   **Model**: wraps all together







## Stem
*   Takes images as inputs
*   Extracts a feature representation from them

In [83]:
class Stem(nn.Module):
  '''
  Extract features using a Resnet-18 stem
  Reference: Week 09 Lab
  '''
  def __init__(self, input_channels, middle_channels, output_channels):
     super(Stem,self).__init__()
     # Default parameters
     kernel_size=3
     stride=1
     padding=1
     
     # Combine multiple layers
     self.stem = nn.Sequential(
       nn.Conv2d(input_channels, middle_channels, kernel_size = kernel_size, stride = stride, padding = padding),
       nn.BatchNorm2d(middle_channels), 
       nn.ReLU(inplace=True),
       nn.Conv2d(middle_channels, middle_channels,kernel_size = kernel_size, stride = stride, padding = padding),
       nn.BatchNorm2d(middle_channels),
       nn.ReLU(inplace=True),
       nn.MaxPool2d(2), # Half the size of the image
       nn.Conv2d(middle_channels, output_channels, kernel_size = kernel_size, stride = stride, padding = padding),
       nn.BatchNorm2d(output_channels),
       nn.ReLU(inplace=True),
       nn.MaxPool2d(2) # Half the size of the image
       )

  def forward(self,x):
    x = self.stem(x)
    return x

## Block

In [84]:
class ExpertBranch(nn.Module):
  '''
  Expert branch predicting vector a with K elements from input tensor X
  '''
  def __init__(self, input_channels, k, r):
    super(ExpertBranch,self).__init__()
    # Spatially pool x
    self.pool= nn.AdaptiveAvgPool2d(1)
    #Forward through fc1, reducing by r
    self.fc1= nn.Linear(input_channels, input_channels//r)
    # Activation function ReLu
    self.relu= nn.ReLU()
    # Forward through fc2
    self.fc2= nn.Linear(input_channels//r,k)

  def forward(self,x):
    # Spatially pool X
    x = self.pool(x)
    # Forward through fc1, reducing by r
    x= x.squeeze(-1).squeeze(-1)
    x = self.fc1(x)
    # Processed through non-linear activation g
    x = F.relu(x)
    # Pass through fc2
    x = self.fc2(x)
    # Forward with softmax
    x = F.softmax(x,dim=1)
    return x

In [85]:
class Block(nn.Module):
  '''
  Block
  '''
  def __init__(self, input_channels, output_channels, k, r):
    super(Block, self).__init__()
    # Default parameters
    kernel_size=3
    stride=1
    padding=1
    # Set parameters
    self.k= k
    self.expertBranch = ExpertBranch(input_channels, k=k, r=r)
    # Input from first block
    # Input from previous block for rest
    # Generate vector a with K elements from X as a= E(X)
    # Create K convolutional layers
    self.convs= nn.ModuleList([
        nn.Conv2d(input_channels, output_channels, kernel_size=kernel_size, stride= stride, padding=padding)
        for _ in range(k)
    ])

  def forward(self,x):
    identity= x
    # Vector a from expert branch
    a = self.expertBranch(x)
    # Convolutional layers 
    conv_outputs = [conv(x) for conv in self.convs]
    stacked = torch.stack(conv_outputs, dim=1)
    # Create vector O
    a= a.view(a.size(0), self.k, 1,1,1)

    out = (a* stacked).sum(dim=1)
    # Skip connection to stablise gradient descent
    out += identity
    out = F.relu(out) # activation after skip

    return out

## Backbone

In [86]:
class Backbone(nn.Module):
  '''
  N blocks
  '''
  def __init__(self, input_channels, hidden_channels, num_blocks, k, r):
    super(Backbone, self).__init__()
    self.blocks= nn.ModuleList()

    # First block takes input from stem
    self.blocks.append(Block(input_channels, hidden_channels, k=k, r=r))

    # Rest of blocks take input form previous block
    for _ in range(1, num_blocks):
      self.blocks.append(Block(hidden_channels, hidden_channels, k=k, r=r))

  def forward(self, x):
    for idx, block in enumerate(self.blocks):
      x = block(x)
    return x

## Classifier

In [87]:
class Classifier(nn.Module):
  def __init__(self, input_channels, num_classes, use_mlp):
    super(Classifier,self).__init__()
    # Default parameters
    dropout_rate=0.25
    # Spatially pool
    self.pool = nn.AdaptiveAvgPool2d(1)
    self.use_mlp= use_mlp

    if use_mlp:
      self.classifier= nn.Sequential(
          nn.Linear(input_channels, input_channels*2),
          nn.ReLU(),
          nn.Dropout(dropout_rate), # Deeper network with 3 layers
          nn.Linear(input_channels*2, input_channels),
          nn.ReLU(),
          nn.Dropout(dropout_rate),
          nn.Linear(input_channels, num_classes)
      )
    else:
      self.classifier= nn.Linear(input_channels, num_classes)

  def forward(self, x):
    x = self.pool(x).squeeze(-1).squeeze(-1)
    out = self.classifier(x)
    return out


# Model

In [88]:
class Model(nn.Module):
  def __init__(self, input_channels, output_channels, middle_channels, hidden_channels, num_blocks, k, r, num_classes, use_mlp):
    super(Model, self).__init__()
    # Call stem
    self.stem= Stem(
      input_channels=input_channels,
      middle_channels=middle_channels,
      output_channels=output_channels
    )
    # Call backbone
    self.backbone= Backbone(
      input_channels=output_channels, 
      hidden_channels= hidden_channels, 
      num_blocks=num_blocks,
      k=k, 
      r=r)
    # Call classifier
    self.classifier= Classifier(
      input_channels=hidden_channels, 
      num_classes=num_classes,
      use_mlp= use_mlp)

  def forward(self,x):
    x= self.stem(x)
    x= self.backbone(x)
    x= self.classifier(x)
    return x

# Create the loss and optmiser


In [None]:
model = Model(
    input_channels=3,
    output_channels=128,
    middle_channels=64,
    hidden_channels=128,
    num_blocks=7,
    k=4,
    r=8,
    num_classes=10,
    use_mlp=True
)

def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

model.apply(init_weights)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
# optimizer = optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-4, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

# Training & Testing

In [None]:
# Set up device 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Save model
model.to(device)

# Log training 
train_losses, val_losses = [], []
train_accuracies = []
val_accuracies = []

# Training and Validation Loops 
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(loader, desc="Training", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    return running_loss / len(loader), 100 * correct / total

def evaluate(model, loader, criterion, device):
    model.eval()
    total = 0
    correct = 0
    loss = 0.0

    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Validating", leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss += criterion(outputs, labels).item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return loss / len(loader), 100 * correct / total

# Main Loop 
# patience = 20  # Number of epochs to wait for improvement
early_stop_counter = 0 # Counter for early stopping
epochs = 200
best_acc = 0.0

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    train_loss, train_acc = train(model, trainloader, criterion, optimizer, device)
    val_loss, val_acc = evaluate(model, testloader, criterion, device)


    # Log metrics
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    print(f"Train Loss: {train_loss:.4f} | Accuracy: {train_acc:.2f}%")
    print(f"Val   Loss: {val_loss:.4f} | Accuracy: {val_acc:.2f}%")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        early_stop_counter=0
        torch.save(model.state_dict(), "best_model.pth")
        print("Saved best model.")
    else:
        early_stop_counter += 1
        print(f"No improvement for {early_stop_counter} epochs.")

    # if early_stop_counter >= patience:
    #     print(f"Early stopping triggered after {epoch+1} epochs.")
    #     break
print("\nTraining Complete")

# Print Final Averages 
avg_train_loss = sum(train_losses) / len(train_losses)
avg_val_loss = sum(val_losses) / len(val_losses)
avg_train_acc = sum(train_accuracies) / len(train_accuracies)
avg_val_acc = sum(val_accuracies) / len(val_accuracies)

print("\nFinal Averages Over All Epochs")
print(f"Average Train Loss: {avg_train_loss:.4f}")
print(f"Average Train Accuracy: {avg_train_acc:.2f}%")
print(f"Average Val   Loss: {avg_val_loss:.4f}")
print(f"Average Val   Accuracy: {avg_val_acc:.2f}%")


# Plot results

# Plot Loss
plt.figure()
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title("Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.savefig("loss_curve.png")

# Plot Accuracy
plt.figure()
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title("Accuracy Curve")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.legend()
plt.grid()
plt.savefig("accuracy_curve.png")

print("Plots saved: loss_curve.png and accuracy_curve.png")



Epoch 1/200


Training:   0%|          | 0/782 [00:00<?, ?it/s]

                                                              

Train Loss: 2.0017 | Accuracy: 28.12%
Val   Loss: 1.7449 | Accuracy: 40.96%
Saved best model.

Epoch 2/200


                                                              

Train Loss: 1.7377 | Accuracy: 42.11%
Val   Loss: 1.5354 | Accuracy: 51.11%
Saved best model.

Epoch 3/200


                                                              

Train Loss: 1.6148 | Accuracy: 48.98%
Val   Loss: 1.4600 | Accuracy: 54.80%
Saved best model.

Epoch 4/200


                                                              

Train Loss: 1.5292 | Accuracy: 53.17%
Val   Loss: 1.3493 | Accuracy: 61.07%
Saved best model.

Epoch 5/200


                                                              

Train Loss: 1.4608 | Accuracy: 56.67%
Val   Loss: 1.2894 | Accuracy: 63.98%
Saved best model.

Epoch 6/200


                                                              

Train Loss: 1.4019 | Accuracy: 59.36%
Val   Loss: 1.3127 | Accuracy: 62.80%
No improvement for 1 epochs.

Epoch 7/200


                                                              

Train Loss: 1.3470 | Accuracy: 62.16%
Val   Loss: 1.1703 | Accuracy: 70.41%
Saved best model.

Epoch 8/200


                                                              

Train Loss: 1.3068 | Accuracy: 64.32%
Val   Loss: 1.1409 | Accuracy: 71.59%
Saved best model.

Epoch 9/200


                                                              

Train Loss: 1.2698 | Accuracy: 66.11%
Val   Loss: 1.0816 | Accuracy: 74.57%
Saved best model.

Epoch 10/200


                                                              

Train Loss: 1.2410 | Accuracy: 67.58%
Val   Loss: 1.1352 | Accuracy: 72.78%
No improvement for 1 epochs.

Epoch 11/200


                                                              

Train Loss: 1.2205 | Accuracy: 68.64%
Val   Loss: 1.0614 | Accuracy: 75.62%
Saved best model.

Epoch 12/200


                                                              

Train Loss: 1.1987 | Accuracy: 69.18%
Val   Loss: 1.0628 | Accuracy: 75.43%
No improvement for 1 epochs.

Epoch 13/200


                                                              

Train Loss: 1.1828 | Accuracy: 70.14%
Val   Loss: 1.0264 | Accuracy: 76.91%
Saved best model.

Epoch 14/200


                                                              

Train Loss: 1.1704 | Accuracy: 70.95%
Val   Loss: 0.9647 | Accuracy: 79.95%
Saved best model.

Epoch 15/200


                                                              

Train Loss: 1.1548 | Accuracy: 71.58%
Val   Loss: 1.0037 | Accuracy: 77.99%
No improvement for 1 epochs.

Epoch 16/200


                                                              

Train Loss: 1.1398 | Accuracy: 72.39%
Val   Loss: 1.0050 | Accuracy: 78.16%
No improvement for 2 epochs.

Epoch 17/200


                                                              

Train Loss: 1.1275 | Accuracy: 72.93%
Val   Loss: 0.9559 | Accuracy: 80.31%
Saved best model.

Epoch 18/200


                                                              

Train Loss: 1.1236 | Accuracy: 73.04%
Val   Loss: 0.9967 | Accuracy: 78.52%
No improvement for 1 epochs.

Epoch 19/200


                                                              

Train Loss: 1.1154 | Accuracy: 73.63%
Val   Loss: 0.9677 | Accuracy: 79.81%
No improvement for 2 epochs.

Epoch 20/200


                                                              

Train Loss: 1.1067 | Accuracy: 73.80%
Val   Loss: 0.9542 | Accuracy: 80.09%
No improvement for 3 epochs.

Epoch 21/200


                                                              

Train Loss: 1.0979 | Accuracy: 74.20%
Val   Loss: 0.9701 | Accuracy: 79.52%
No improvement for 4 epochs.

Epoch 22/200


                                                              

Train Loss: 1.0874 | Accuracy: 74.55%
Val   Loss: 0.9057 | Accuracy: 82.30%
Saved best model.

Epoch 23/200


                                                              

Train Loss: 1.0845 | Accuracy: 74.75%
Val   Loss: 0.9400 | Accuracy: 81.12%
No improvement for 1 epochs.

Epoch 24/200


                                                              

Train Loss: 1.0748 | Accuracy: 75.44%
Val   Loss: 0.9812 | Accuracy: 79.34%
No improvement for 2 epochs.

Epoch 25/200


                                                              

Train Loss: 1.0702 | Accuracy: 75.35%
Val   Loss: 0.9265 | Accuracy: 81.61%
No improvement for 3 epochs.

Epoch 26/200


                                                              

Train Loss: 1.0657 | Accuracy: 75.69%
Val   Loss: 0.9143 | Accuracy: 81.93%
No improvement for 4 epochs.

Epoch 27/200


                                                              

Train Loss: 1.0595 | Accuracy: 75.94%
Val   Loss: 0.9342 | Accuracy: 81.06%
No improvement for 5 epochs.

Epoch 28/200


                                                              

Train Loss: 1.0535 | Accuracy: 76.13%
Val   Loss: 0.9097 | Accuracy: 82.38%
Saved best model.

Epoch 29/200


                                                              

Train Loss: 1.0529 | Accuracy: 76.27%
Val   Loss: 0.9030 | Accuracy: 82.64%
Saved best model.

Epoch 30/200


                                                              

Train Loss: 1.0510 | Accuracy: 76.25%
Val   Loss: 0.8889 | Accuracy: 83.59%
Saved best model.

Epoch 31/200


                                                              

Train Loss: 1.0417 | Accuracy: 76.67%
Val   Loss: 0.8985 | Accuracy: 83.10%
No improvement for 1 epochs.

Epoch 32/200


                                                              

Train Loss: 1.0406 | Accuracy: 76.80%
Val   Loss: 0.8818 | Accuracy: 83.08%
No improvement for 2 epochs.

Epoch 33/200


                                                              

Train Loss: 1.0338 | Accuracy: 77.26%
Val   Loss: 0.8805 | Accuracy: 84.07%
Saved best model.

Epoch 34/200


                                                              

Train Loss: 1.0333 | Accuracy: 77.06%
Val   Loss: 0.9051 | Accuracy: 82.69%
No improvement for 1 epochs.

Epoch 35/200


                                                              

Train Loss: 1.0278 | Accuracy: 77.22%
Val   Loss: 0.9174 | Accuracy: 82.10%
No improvement for 2 epochs.

Epoch 36/200


                                                              

Train Loss: 1.0253 | Accuracy: 77.33%
Val   Loss: 0.9034 | Accuracy: 82.68%
No improvement for 3 epochs.

Epoch 37/200


                                                              

Train Loss: 1.0240 | Accuracy: 77.44%
Val   Loss: 0.8819 | Accuracy: 83.58%
No improvement for 4 epochs.

Epoch 38/200


                                                              

Train Loss: 1.0167 | Accuracy: 77.92%
Val   Loss: 0.8753 | Accuracy: 83.87%
No improvement for 5 epochs.

Epoch 39/200


                                                              

Train Loss: 1.0133 | Accuracy: 78.01%
Val   Loss: 0.8985 | Accuracy: 82.65%
No improvement for 6 epochs.

Epoch 40/200


                                                              

Train Loss: 1.0129 | Accuracy: 78.03%
Val   Loss: 0.8452 | Accuracy: 85.22%
Saved best model.

Epoch 41/200


                                                              

Train Loss: 1.0035 | Accuracy: 78.40%
Val   Loss: 0.8675 | Accuracy: 83.81%
No improvement for 1 epochs.

Epoch 42/200


                                                              

Train Loss: 1.0119 | Accuracy: 78.15%
Val   Loss: 0.8630 | Accuracy: 84.29%
No improvement for 2 epochs.

Epoch 43/200


                                                              

Train Loss: 1.0038 | Accuracy: 78.26%
Val   Loss: 0.8534 | Accuracy: 84.79%
No improvement for 3 epochs.

Epoch 44/200


                                                              

Train Loss: 1.0051 | Accuracy: 78.30%
Val   Loss: 0.8542 | Accuracy: 84.81%
No improvement for 4 epochs.

Epoch 45/200


                                                              

Train Loss: 1.0000 | Accuracy: 78.64%
Val   Loss: 0.8679 | Accuracy: 84.43%
No improvement for 5 epochs.

Epoch 46/200


                                                              

Train Loss: 1.0005 | Accuracy: 78.61%
Val   Loss: 0.8797 | Accuracy: 83.99%
No improvement for 6 epochs.

Epoch 47/200


                                                              

Train Loss: 0.9954 | Accuracy: 78.79%
Val   Loss: 0.8530 | Accuracy: 84.41%
No improvement for 7 epochs.

Epoch 48/200


                                                              

Train Loss: 0.9908 | Accuracy: 79.09%
Val   Loss: 0.8570 | Accuracy: 84.85%
No improvement for 8 epochs.

Epoch 49/200


                                                              

Train Loss: 0.9912 | Accuracy: 78.99%
Val   Loss: 0.8491 | Accuracy: 85.02%
No improvement for 9 epochs.

Epoch 50/200


                                                              

Train Loss: 0.9930 | Accuracy: 78.98%
Val   Loss: 0.8780 | Accuracy: 83.71%
No improvement for 10 epochs.

Epoch 51/200


                                                              

Train Loss: 0.9856 | Accuracy: 79.17%
Val   Loss: 0.8333 | Accuracy: 85.89%
Saved best model.

Epoch 52/200


                                                              

Train Loss: 0.9879 | Accuracy: 79.23%
Val   Loss: 0.8322 | Accuracy: 85.70%
No improvement for 1 epochs.

Epoch 53/200


                                                              

Train Loss: 0.9852 | Accuracy: 79.19%
Val   Loss: 0.8443 | Accuracy: 85.20%
No improvement for 2 epochs.

Epoch 54/200


                                                              

Train Loss: 0.9799 | Accuracy: 79.49%
Val   Loss: 0.8256 | Accuracy: 85.66%
No improvement for 3 epochs.

Epoch 55/200


                                                              

Train Loss: 0.9796 | Accuracy: 79.35%
Val   Loss: 0.8323 | Accuracy: 85.83%
No improvement for 4 epochs.

Epoch 56/200


                                                              

Train Loss: 0.9835 | Accuracy: 79.21%
Val   Loss: 0.8290 | Accuracy: 85.94%
Saved best model.

Epoch 57/200


                                                              

Train Loss: 0.9755 | Accuracy: 79.73%
Val   Loss: 0.8367 | Accuracy: 85.68%
No improvement for 1 epochs.

Epoch 58/200


                                                              

Train Loss: 0.9759 | Accuracy: 79.73%
Val   Loss: 0.8208 | Accuracy: 86.28%
Saved best model.

Epoch 59/200


                                                              

Train Loss: 0.9763 | Accuracy: 79.69%
Val   Loss: 0.8372 | Accuracy: 85.27%
No improvement for 1 epochs.

Epoch 60/200


                                                              

Train Loss: 0.9717 | Accuracy: 79.82%
Val   Loss: 0.8335 | Accuracy: 85.64%
No improvement for 2 epochs.

Epoch 61/200


Training:  64%|██████▍   | 502/782 [00:17<00:09, 28.53it/s]