<a href="https://colab.research.google.com/github/bhanup6663/COMP691_DL/blob/main/cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import pandas as pd

# Set random seed for reproducibility
torch.manual_seed(123)
np.random.seed(123)
random.seed(123)

# Define Basic Block for ResNet
class BasicResidualBlock(nn.Module):
    def __init__(self, input_dim, output_dim, step=1):
        super(BasicResidualBlock, self).__init__()
        self.conv_main = nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=step, padding=1, bias=False)
        self.batch_norm_main = nn.BatchNorm2d(output_dim)
        self.conv_second = nn.Conv2d(output_dim, output_dim, kernel_size=3, stride=1, padding=1, bias=False)
        self.batch_norm_second = nn.BatchNorm2d(output_dim)

        self.shortcut_path = nn.Sequential()
        if step != 1 or input_dim != output_dim:
            self.shortcut_path = nn.Sequential(
                nn.Conv2d(input_dim, output_dim, kernel_size=1, stride=step, bias=False),
                nn.BatchNorm2d(output_dim)
            )

    def forward(self, x):
        residual = self.shortcut_path(x)
        x = torch.relu(self.batch_norm_main(self.conv_main(x)))
        x = self.batch_norm_second(self.conv_second(x))
        x += residual
        return torch.relu(x)

# Define ResNet Architecture
class ResidualNetArchitecture(nn.Module):
    def __init__(self, block, block_counts, num_classes=100):
        super(ResidualNetArchitecture, self).__init__()
        self.init_filters = 16
        self.conv_start = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn_start = nn.BatchNorm2d(16)
        self.residual_layer1 = self._build_layer(block, 16, block_counts[0], step=1)
        self.residual_layer2 = self._build_layer(block, 32, block_counts[1], step=2)
        self.residual_layer3 = self._build_layer(block, 64, block_counts[2], step=2)
        self.final_fc = nn.Linear(64, num_classes)

    def _build_layer(self, block, filters, num_blocks, step):
        steps = [step] + [1] * (num_blocks - 1)
        layers = []
        for s in steps:
            layers.append(block(self.init_filters, filters, s))
            self.init_filters = filters
        return nn.Sequential(*layers)

    def forward(self, x):
        x = torch.relu(self.bn_start(self.conv_start(x)))
        x = self.residual_layer1(x)
        x = self.residual_layer2(x)
        x = self.residual_layer3(x)
        x = nn.functional.avg_pool2d(x, 8)
        x = x.view(x.size(0), -1)
        return self.final_fc(x)

# Define ResNet Variants
def ResNet44Custom(num_classes=100):
    return ResidualNetArchitecture(BasicResidualBlock, [7, 7, 7], num_classes)

def ResNet56Custom(num_classes=100):
    return ResidualNetArchitecture(BasicResidualBlock, [9, 9, 9], num_classes)

def ResNet110Custom(num_classes=100):
    return ResidualNetArchitecture(BasicResidualBlock, [18, 18, 18], num_classes)

# Define Random Erasing Augmentation
class DataAugmentRandomErase:
    def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean_values=[0.5, 0.5, 0.5]):
        self.prob = probability
        self.sl = sl
        self.sh = sh
        self.r1 = r1
        self.mean = mean_values

    def __call__(self, img):
        if random.uniform(0, 1) > self.prob:
            return img
        for _ in range(100):
            area = img.size()[1] * img.size()[2]
            target_area = random.uniform(self.sl, self.sh) * area
            aspect_ratio = random.uniform(self.r1, 1 / self.r1)
            h = int(round(np.sqrt(target_area * aspect_ratio)))
            w = int(round(np.sqrt(target_area / aspect_ratio)))
            if h < img.size()[1] and w < img.size()[2]:
                x1 = random.randint(0, img.size()[1] - h)
                y1 = random.randint(0, img.size()[2] - w)
                img[:, x1:x1 + h, y1:y1 + w] = torch.tensor(self.mean).view(-1, 1, 1)
                return img
        return img

# Data Preparation
train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    DataAugmentRandomErase(probability=0.5, mean_values=[0.5, 0.5, 0.5])
])

test_transforms = transforms.Compose([
    transforms.ToTensor()
])

training_dataset = datasets.CIFAR100(root='./data', train=True, download=True, transform=train_transforms)
testing_dataset = datasets.CIFAR100(root='./data', train=False, download=True, transform=test_transforms)
training_loader = DataLoader(training_dataset, batch_size=128, shuffle=True)
testing_loader = DataLoader(testing_dataset, batch_size=128, shuffle=False)

# Training Function
def perform_training(model, loader, optimizer, criterion, device):
    model.train()
    cumulative_loss, total_correct = 0, 0
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        predictions = model(inputs)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()
        cumulative_loss += loss.item()
        total_correct += (predictions.argmax(1) == labels).sum().item()
    return cumulative_loss / len(loader), total_correct / len(loader.dataset)

# Evaluation Function
def perform_evaluation(model, loader, criterion, device):
    model.eval()
    cumulative_loss, total_correct = 0, 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            predictions = model(inputs)
            loss = criterion(predictions, labels)
            cumulative_loss += loss.item()
            total_correct += (predictions.argmax(1) == labels).sum().item()
    accuracy = total_correct / len(loader.dataset)
    error_rate = 1 - accuracy
    return cumulative_loss / len(loader), error_rate

# Run Training and Testing with Adjustments for Reference Results
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_models = {
    "ResNet-44": ResNet44Custom(),
    "ResNet-56": ResNet56Custom(),
    "ResNet-110": ResNet110Custom()
}

results_summary = []

# Adjustments for the number of epochs and learning rate schedule
num_epochs = 100  # Training for 100 epochs
milestones = [50, 75]

for model_name, network in custom_models.items():
    print(f"Training {model_name}...")
    network.to(device)
    loss_func = nn.CrossEntropyLoss()
    opt = optim.SGD(network.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=milestones, gamma=0.1)

    for epoch in range(num_epochs):
        train_loss, train_accuracy = perform_training(network, training_loader, opt, loss_func, device)
        test_loss, test_error = perform_evaluation(network, testing_loader, loss_func, device)
        scheduler.step()

        # Log progress every 10 epochs
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch + 1}/{num_epochs}: Test Error = {test_error * 100:.2f}%")

    # Store results
    results_summary.append({"Model": model_name, "Baseline Error (%)": test_error * 100})

# Convert results to DataFrame
results_dataframe = pd.DataFrame(results_summary)
print(results_dataframe)

# Save results to CSV
results_dataframe.to_csv("results_100_epochs.csv", index=False)
