<a href="https://colab.research.google.com/github/bhanup6663/COMP691_DL/blob/main/Untitled9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Set random seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

# Define BasicBlock for ResNet
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

# Define ResNet Architecture
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):
        super(ResNet, self).__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = nn.functional.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# ResNet model variants
def ResNet20(num_classes=100):
    return ResNet(BasicBlock, [3, 3, 3], num_classes)

def ResNet32(num_classes=100):
    return ResNet(BasicBlock, [5, 5, 5], num_classes)

def ResNet44(num_classes=100):
    return ResNet(BasicBlock, [7, 7, 7], num_classes)

def ResNet56(num_classes=100):
    return ResNet(BasicBlock, [9, 9, 9], num_classes)

def ResNet110(num_classes=100):
    return ResNet(BasicBlock, [18, 18, 18], num_classes)

# Random Erasing for Data Augmentation
class RandomErasing:
    def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]):
        self.probability = probability
        self.sl = sl
        self.sh = sh
        self.r1 = r1
        self.mean = mean

    def __call__(self, img):
        if random.uniform(0, 1) > self.probability:
            return img
        for _ in range(100):
            area = img.size()[1] * img.size()[2]
            target_area = random.uniform(self.sl, self.sh) * area
            aspect_ratio = random.uniform(self.r1, 1 / self.r1)
            h = int(round(np.sqrt(target_area * aspect_ratio)))
            w = int(round(np.sqrt(target_area / aspect_ratio)))
            if w < img.size()[2] and h < img.size()[1]:
                x1 = random.randint(0, img.size()[1] - h)
                y1 = random.randint(0, img.size()[2] - w)
                img[:, x1:x1 + h, y1:y1 + w] = torch.tensor(self.mean).view(-1, 1, 1)
                return img
        return img

# Data Augmentation and Loading
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    RandomErasing(probability=0.5, mean=[0.4914, 0.4822, 0.4465])
])

transform_test = transforms.Compose([transforms.ToTensor()])

train_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Training Function
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss, total_correct = 0, 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_correct += (outputs.argmax(dim=1) == labels).sum().item()
    return total_loss / len(loader), total_correct / len(loader.dataset)

# Testing Function
def test(model, loader, criterion, device):
    model.eval()
    total_loss, total_correct = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            total_correct += (outputs.argmax(dim=1) == labels).sum().item()
    return total_loss / len(loader), total_correct / len(loader.dataset)

# Evaluate Multiple Runs
def evaluate_multiple_runs(model, train_loader, test_loader, criterion, optimizer, device, num_runs=5, epochs=100):
    error_rates = []
    for _ in range(num_runs):
        model.apply(reset_weights)  # Reset weights for each run
        for epoch in range(epochs):
            train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
        test_loss, test_acc = test(model, test_loader, criterion, device)
        error_rates.append(1 - test_acc)  # Calculate error rate
    return np.mean(error_rates), np.std(error_rates)

def reset_weights(module):
    if isinstance(module, (nn.Conv2d, nn.Linear)):
        module.reset_parameters()

# Main Evaluation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
models_to_test = {
    "ResNet-20": ResNet20(),
    "ResNet-32": ResNet32(),
    "ResNet-44": ResNet44(),
    "ResNet-56": ResNet56(),
    "ResNet-110": ResNet110(),
}
results = {}

for model_name, model in models_to_test.items():
    print(f"Evaluating {model_name}...")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

    mean_baseline, std_baseline = evaluate_multiple_runs(model, train_loader, test_loader, criterion, optimizer, device, num_runs=5, epochs=100)
    results[model_name] = {"Baseline Error Rate": mean_baseline, "Baseline Std": std_baseline}

    print(f"{model_name} Baseline Error Rate: {mean_baseline:.4f} ± {std_baseline:.4f}")

# Print Results
print("\nResults Summary:")
for model_name, metrics in results.items():
    print(f"{model_name}: Baseline Error Rate: {metrics['Baseline Error Rate']:.4f}, Std: {metrics['Baseline Std']:.4f}")


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:11<00:00, 14.6MB/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified
Evaluating ResNet-20 (Baseline)...


In [None]:
for model_name, model in models_to_test.items():
    print(f"Evaluating {model_name}...")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

    # Evaluate Baseline
    mean_baseline, std_baseline = evaluate_multiple_runs(model, train_loader, test_loader, criterion, optimizer, device, num_runs=5, epochs=100)
    results[model_name] = {
        "Baseline Error Rate": mean_baseline,
        "Baseline Std": std_baseline
    }
    print(f"{model_name} Baseline Error Rate: {mean_baseline:.4f} ± {std_baseline:.4f}")

    # Evaluate with Random Erasing
    print(f"Evaluating {model_name} with Random Erasing...")
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)  # Reinitialize DataLoader
    mean_erasing, std_erasing = evaluate_multiple_runs(model, train_loader, test_loader, criterion, optimizer, device, num_runs=5, epochs=100)
    results[model_name]["Random Erasing Error Rate"] = mean_erasing
    results[model_name]["Random Erasing Std"] = std_erasing
    print(f"{model_name} Random Erasing Error Rate: {mean_erasing:.4f} ± {std_erasing:.4f}")
