### Dataset Loading and Exploration
This section loads the CIFAR-100 dataset, which consists of 500 training and 100 testing images for each of 100 classes. The dataset is divided into:
- Sub-training set
- Validation set
- Test set

#### Key Points:
- "Fine" labels (100 classes) are used for training and evaluation.
- The dataset is normalized using standard mean and standard deviation values for CIFAR-100.


In [2]:
# Importing libraries to work with CIFAR-100 and PyTorch models

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
from collections import defaultdict

# Setting up transformations for data augmentation and normalization
# I am using random cropping and flipping to improve generalization
# Also normalizing the dataset with CIFAR-100 specific mean and std

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5071, 0.4867, 0.4408),
                         std=(0.2675, 0.2565, 0.2761))
])

# Define transformations for testing/validation
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5071, 0.4867, 0.4408),
                         std=(0.2675, 0.2565, 0.2761))
])

# Loading the CIFAR-100 dataset
# I downloaded the dataset and applied the transformations I set up earlier

train_dataset_full = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)

test_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)


# Splitting the training dataset into sub-training and validation sets
# I am using 80% of the training dataset for sub-training and 20% for validation (1/5 of the total dataset)

train_size = int(0.8 * len(train_dataset_full))  # 80% for sub-training
val_size = len(train_dataset_full) - train_size  # 20% for validation

# Using random_split to divide the dataset

sub_train_dataset, val_dataset = random_split(
    train_dataset_full, [train_size, val_size],
    generator=torch.Generator().manual_seed(42))

print(f"Sub-Training Set Size: {len(sub_train_dataset)}")
print(f"Validation Set Size: {len(val_dataset)}")


batch_size = 128  # I chose a batch size of 128 for balanced memory usage and speed

# Creating data loaders for training, validation, and testing

sub_train_loader = DataLoader(sub_train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Get class names
class_names = train_dataset_full.classes
print(f"Number of Classes: {len(class_names)}")
print(f"Class Names: {class_names}")

# Get one batch from the sub-training loader
data_iter = iter(sub_train_loader)
images, labels = next(data_iter)
print(f"Images shape: {images.shape}")  # Should be [batch_size, 3, 32, 32]
print(f"Labels shape: {labels.shape}")  # Should be [batch_size]


Files already downloaded and verified
Files already downloaded and verified
Sub-Training Set Size: 40000
Validation Set Size: 10000
Number of Classes: 100
Class Names: ['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar

In [3]:
# Utilise Apple Silicon GPU since I am using macbook hardware which has a separate gpu than cuda
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("MPS backend is available. Using MPS device.")
else:
    device = torch.device("cpu")
    print("MPS backend is not available. Using CPU.")

MPS backend is available. Using MPS device.


In [4]:
import torch.nn as nn
import torchvision.models as models

class ResNet_CIFAR100(nn.Module):
    def __init__(self, num_classes=100):
        super(ResNet_CIFAR100, self).__init__()
        self.model = models.resnet18(pretrained=False)
        # Modify the first convolution layer
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        # Remove the maxpool layer
        self.model.maxpool = nn.Identity()
        # Modify the fully connected layer
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.model(x)

model_resnet = ResNet_CIFAR100().to(device)
print(model_resnet)

ResNet_CIFAR100(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): Identity()
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      



In [5]:
class VGG16_CIFAR100(nn.Module):
    def __init__(self, num_classes=100):
        super(VGG16_CIFAR100, self).__init__()
        self.model = models.vgg16(pretrained=False)
        # Modify the classifier
        self.model.classifier[6] = nn.Linear(in_features=4096, out_features=num_classes)
    
    def forward(self, x):
        return self.model(x)

# Instantiate the model
model_vgg16 = VGG16_CIFAR100().to(device)
print(model_vgg16)


VGG16_CIFAR100(
  (model): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
     

In [6]:
class GoogLeNet_CIFAR100(nn.Module):
    def __init__(self, num_classes=100):
        super(GoogLeNet_CIFAR100, self).__init__()
        self.model = models.googlenet(pretrained=False, aux_logits=False)
        # Modify the first convolutional layer
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        # Remove the maxpool layer
        self.model.maxpool1 = nn.Identity()
        # Modify the fully connected layer
        self.model.fc = nn.Linear(in_features=1024, out_features=num_classes)
    
    def forward(self, x):
        return self.model(x)

# Instantiate the model
model_googlenet = GoogLeNet_CIFAR100().to(device)
print(model_googlenet)


GoogLeNet_CIFAR100(
  (model): GoogLeNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (maxpool1): Identity()
    (conv2): BasicConv2d(
      (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv3): BasicConv2d(
      (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (inception3a): Inception(
      (branch1): BasicConv2d(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (branch2): Sequential(
        (0): BasicConv2d(
          (conv): Conv2d(192, 96, ker



In [7]:
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

def train_model(model, train_loader, criterion, optimizer, scheduler=None, num_epochs=5):
    model.train()
    for epoch in range(1, num_epochs + 1):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = 100 * correct / total
        print(f'Epoch [{epoch}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
        
        if scheduler:
            scheduler.step()

def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Validation/Test Accuracy: {accuracy:.2f}%')
    return accuracy


In [8]:
# I Defined loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_resnet = optim.SGD(model_resnet.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

# I Defined learning rate scheduler
scheduler_resnet = optim.lr_scheduler.MultiStepLR(optimizer_resnet, milestones=[50, 75], gamma=0.1)

# Trained the model
num_epochs = 10
print("Training ResNet-18")
train_model(model_resnet, sub_train_loader, criterion, optimizer_resnet, scheduler_resnet, num_epochs)

# Evaluated on validation set
accuracy_resnet = evaluate_model(model_resnet, val_loader)


Training ResNet-18
Epoch [1/10], Loss: 4.4437, Accuracy: 4.01%
Epoch [2/10], Loss: 4.0824, Accuracy: 8.86%
Epoch [3/10], Loss: 3.8337, Accuracy: 11.95%
Epoch [4/10], Loss: 3.6685, Accuracy: 14.04%
Epoch [5/10], Loss: 3.5537, Accuracy: 15.82%
Epoch [6/10], Loss: 3.4605, Accuracy: 17.25%
Epoch [7/10], Loss: 3.3687, Accuracy: 18.71%
Epoch [8/10], Loss: 3.2815, Accuracy: 20.51%
Epoch [9/10], Loss: 3.1861, Accuracy: 22.39%
Epoch [10/10], Loss: 3.0950, Accuracy: 23.88%
Validation/Test Accuracy: 22.76%


In [9]:
# I Defined loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_vgg16 = optim.Adam(model_vgg16.parameters(), lr=0.001, weight_decay=5e-4)

# I Defined learning rate scheduler
scheduler_vgg16 = optim.lr_scheduler.StepLR(optimizer_vgg16, step_size=30, gamma=0.1)

# Trained the model
num_epochs = 10
print("Training VGG-16")
train_model(model_vgg16, sub_train_loader, criterion, optimizer_vgg16, scheduler_vgg16, num_epochs)

# Evaluated on validation set
accuracy_vgg16 = evaluate_model(model_vgg16, val_loader)


Training VGG-16
Epoch [1/10], Loss: 4.8676, Accuracy: 0.91%
Epoch [2/10], Loss: 4.6055, Accuracy: 0.91%
Epoch [3/10], Loss: 4.6056, Accuracy: 0.95%
Epoch [4/10], Loss: 4.6056, Accuracy: 0.99%
Epoch [5/10], Loss: 4.6058, Accuracy: 1.02%
Epoch [6/10], Loss: 4.6057, Accuracy: 0.92%
Epoch [7/10], Loss: 4.6057, Accuracy: 0.96%
Epoch [8/10], Loss: 4.6058, Accuracy: 0.93%
Epoch [9/10], Loss: 4.6058, Accuracy: 0.98%
Epoch [10/10], Loss: 4.6058, Accuracy: 0.98%
Validation/Test Accuracy: 0.97%


In [10]:
# Defined loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_googlenet = optim.AdamW(model_googlenet.parameters(), lr=0.001, weight_decay=5e-4)

# Defined learning rate scheduler
scheduler_googlenet = optim.lr_scheduler.CosineAnnealingLR(optimizer_googlenet, T_max=100)

# Trained the model
num_epochs = 10
print("Training GoogLeNet")
train_model(model_googlenet, sub_train_loader, criterion, optimizer_googlenet, scheduler_googlenet, num_epochs)

# Evaluated on validation set
accuracy_googlenet = evaluate_model(model_googlenet, val_loader)


Training GoogLeNet
Epoch [1/10], Loss: 3.9754, Accuracy: 7.87%
Epoch [2/10], Loss: 3.4383, Accuracy: 15.20%
Epoch [3/10], Loss: 2.9452, Accuracy: 23.85%
Epoch [4/10], Loss: 2.5554, Accuracy: 31.74%
Epoch [5/10], Loss: 2.2677, Accuracy: 38.02%
Epoch [6/10], Loss: 2.0384, Accuracy: 43.21%
Epoch [7/10], Loss: 1.8646, Accuracy: 47.30%
Epoch [8/10], Loss: 1.7166, Accuracy: 50.92%
Epoch [9/10], Loss: 1.5846, Accuracy: 54.08%
Epoch [10/10], Loss: 1.4778, Accuracy: 57.26%
Validation/Test Accuracy: 49.20%


### Trying more CNN Networks


In [16]:
import torch
import torch.nn as nn
import torchvision.models as models

class DenseNet_CIFAR100(nn.Module):
    def __init__(self, num_classes=100):
        super(DenseNet_CIFAR100, self).__init__()
        # Load pre-defined DenseNet-121 model
        self.model = models.densenet121(pretrained=False)
        # Modify the first convolution layer
        self.model.features.conv0 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        # Remove the initial pooling layer to preserve spatial dimensions
        self.model.features.pool0 = nn.Identity()
        # Modify the classifier to output 100 classes
        self.model.classifier = nn.Linear(self.model.classifier.in_features, num_classes)
    
    def forward(self, x):
        return self.model(x)

# Instantiate the model
model_densenet = DenseNet_CIFAR100().to(device)
print(model_densenet)


DenseNet_CIFAR100(
  (model): DenseNet(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace=True)
      (pool0): Identity()
      (denseblock1): _DenseBlock(
        (denselayer1): _DenseLayer(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (denselayer2): _DenseLayer(
          (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu

In [17]:

# Defined loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_densenet = optim.AdamW(model_densenet.parameters(), lr=0.001, weight_decay=1e-4)

# Defined learning rate scheduler
scheduler_densenet = optim.lr_scheduler.StepLR(optimizer_densenet, step_size=30, gamma=0.1)

# Trained the DenseNet model
num_epochs = 10
print("Training DenseNet-121")
train_model(model_densenet, sub_train_loader, criterion, optimizer_densenet, scheduler_densenet, num_epochs)

# Evaluated on validation set
accuracy_densenet = evaluate_model(model_densenet, val_loader)


Training DenseNet-121
Epoch [1/10], Loss: 3.7049, Accuracy: 12.60%
Epoch [2/10], Loss: 2.8994, Accuracy: 26.29%
Epoch [3/10], Loss: 2.3789, Accuracy: 36.94%
Epoch [4/10], Loss: 1.9966, Accuracy: 45.09%
Epoch [5/10], Loss: 1.7300, Accuracy: 51.42%
Epoch [6/10], Loss: 1.5362, Accuracy: 56.16%
Epoch [7/10], Loss: 1.3637, Accuracy: 60.69%
Epoch [8/10], Loss: 1.2292, Accuracy: 63.91%
Epoch [9/10], Loss: 1.1032, Accuracy: 67.17%
Epoch [10/10], Loss: 0.9993, Accuracy: 70.14%
Validation/Test Accuracy: 60.60%


In [18]:
class ResNeXt_CIFAR100(nn.Module):
    def __init__(self, num_classes=100):
        super(ResNeXt_CIFAR100, self).__init__()
        # Loaded pre-defined ResNeXt-50 model
        self.model = models.resnext50_32x4d(pretrained=False)
        # Modified the first convolution layer
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        # Removed the initial maxpool layer to preserve spatial dimensions
        self.model.maxpool = nn.Identity()
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.model(x)

# Instantiated the model
model_resnext = ResNeXt_CIFAR100().to(device)
print(model_resnext)


ResNeXt_CIFAR100(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): Identity()
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1),

In [19]:
# Defined loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_resnext = optim.SGD(model_resnext.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

# Defined learning rate scheduler
scheduler_resnext = optim.lr_scheduler.MultiStepLR(optimizer_resnext, milestones=[50, 75], gamma=0.1)

# Trained the ResNeXt model
num_epochs = 10
print("Training ResNeXt-50")
train_model(model_resnext, sub_train_loader, criterion, optimizer_resnext, scheduler_resnext, num_epochs)

# Evaluate on validation set
accuracy_resnext = evaluate_model(model_resnext, val_loader)


Training ResNeXt-50
Epoch [1/10], Loss: 4.5186, Accuracy: 2.35%
Epoch [2/10], Loss: 4.3523, Accuracy: 4.59%
Epoch [3/10], Loss: 4.0843, Accuracy: 7.62%
Epoch [4/10], Loss: 3.8986, Accuracy: 10.21%
Epoch [5/10], Loss: 3.7515, Accuracy: 12.29%
Epoch [6/10], Loss: 3.6293, Accuracy: 13.91%
Epoch [7/10], Loss: 3.5137, Accuracy: 16.04%
Epoch [8/10], Loss: 3.4112, Accuracy: 17.78%
Epoch [9/10], Loss: 3.3097, Accuracy: 19.89%
Epoch [10/10], Loss: 3.2084, Accuracy: 21.82%
Validation/Test Accuracy: 22.00%


# Evaluate all models on validation set



In [21]:
from torch.utils.data import ConcatDataset

# Trained the model
num_epochs = 10

criterion = nn.CrossEntropyLoss()

# Combined sub-training and validation datasets
full_train_dataset = ConcatDataset([sub_train_dataset, val_dataset])

# Created DataLoader for full training dataset
full_train_loader = DataLoader(full_train_dataset, batch_size=128, shuffle=True, num_workers=2)

# Retrained DenseNet-121
model_densenet_full = DenseNet_CIFAR100().to(device)
model_densenet_full.load_state_dict(model_densenet.state_dict())
optimizer_densenet_full = optim.AdamW(model_densenet_full.parameters(), lr=0.001, weight_decay=1e-4)
scheduler_densenet_full = optim.lr_scheduler.StepLR(optimizer_densenet_full, step_size=30, gamma=0.1)

print("Retraining DenseNet-121 on Full Training Set")
train_model(model_densenet_full, full_train_loader, criterion, optimizer_densenet_full, scheduler_densenet_full, num_epochs)

# Retrained GoogleNet
model_googlenet_full = GoogLeNet_CIFAR100().to(device)
model_googlenet_full.load_state_dict(model_googlenet.state_dict())
optimizer_googlenet_full = optim.SGD(model_googlenet_full.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
scheduler_googlenet_full = optim.lr_scheduler.MultiStepLR(optimizer_googlenet_full, milestones=[50, 75], gamma=0.1)

print("Retraining GoogleNet on Full Training Set")
train_model(model_googlenet_full, full_train_loader, criterion, optimizer_googlenet_full, scheduler_googlenet_full, num_epochs)

# Retrained ResNet-18
model_resnet_full = ResNet_CIFAR100().to(device)
model_resnet_full.load_state_dict(model_resnet.state_dict())
optimizer_resnet_full = optim.SGD(model_resnet_full.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
scheduler_resnet_full = optim.lr_scheduler.MultiStepLR(optimizer_resnet_full, milestones=[50, 75], gamma=0.1)

print("Retraining ResNet-18 on Full Training Set")
train_model(model_resnet_full, full_train_loader, criterion, optimizer_resnet_full, scheduler_resnet_full, num_epochs)


Retraining DenseNet-121 on Full Training Set
Epoch [1/10], Loss: 1.0235, Accuracy: 69.48%
Epoch [2/10], Loss: 0.8964, Accuracy: 73.01%
Epoch [3/10], Loss: 0.8029, Accuracy: 75.54%
Epoch [4/10], Loss: 0.7287, Accuracy: 77.45%
Epoch [5/10], Loss: 0.6537, Accuracy: 79.72%
Epoch [6/10], Loss: 0.5892, Accuracy: 81.49%
Epoch [7/10], Loss: 0.5308, Accuracy: 83.03%
Epoch [8/10], Loss: 0.4707, Accuracy: 84.86%
Epoch [9/10], Loss: 0.4315, Accuracy: 86.00%
Epoch [10/10], Loss: 0.3853, Accuracy: 87.41%
Retraining GoogleNet on Full Training Set
Epoch [1/10], Loss: 1.2896, Accuracy: 62.25%
Epoch [2/10], Loss: 1.2339, Accuracy: 64.29%
Epoch [3/10], Loss: 1.2036, Accuracy: 65.07%
Epoch [4/10], Loss: 1.1875, Accuracy: 65.60%
Epoch [5/10], Loss: 1.1770, Accuracy: 65.81%
Epoch [6/10], Loss: 1.1680, Accuracy: 66.01%
Epoch [7/10], Loss: 1.1611, Accuracy: 66.30%
Epoch [8/10], Loss: 1.1457, Accuracy: 66.58%
Epoch [9/10], Loss: 1.1396, Accuracy: 66.93%
Epoch [10/10], Loss: 1.1361, Accuracy: 66.75%
Retraining 

In [22]:

# Evaluated on test set
print("Testing DenseNet-121")
test_accuracy_densenet = evaluate_model(model_densenet_full, test_loader)

print("Testing GoogleNet")
test_accuracy_googlenet = evaluate_model(model_googlenet_full, test_loader)

print("Testing ResNet-18")
test_accuracy_resnet = evaluate_model(model_resnet_full, test_loader)

# Summary of test accuracies
test_accuracies = {
    "DenseNet-121": test_accuracy_densenet,
    "GoogleNet": test_accuracy_googlenet,
    "ResNet-18": test_accuracy_resnet
}

print("\nTest Accuracies of Top Three Models:")
for model_name, acc in test_accuracies.items():
    print(f"{model_name}: {acc:.2f}%")


Testing DenseNet-121
Validation/Test Accuracy: 66.78%
Testing GoogleNet
Validation/Test Accuracy: 61.20%
Testing ResNet-18
Validation/Test Accuracy: 37.77%

Test Accuracies of Top Three Models:
DenseNet-121: 66.78%
GoogleNet: 61.20%
ResNet-18: 37.77%
