In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

100%|██████████| 170M/170M [00:05<00:00, 30.9MB/s]


In [3]:
class ResNetBottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion,
                               kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        self.relu = nn.ReLU(inplace=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion)
            )

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += identity
        out = self.relu(out)
        return out

In [4]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride):
        strides = [stride] + [1]*(blocks-1)
        layers = []
        for s in strides:
            layers.append(block(self.in_channels, out_channels, s))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

def resnet50_cifar_manual():
    return ResNet(ResNetBottleneck, [3, 4, 6, 3])

In [5]:
class ResNeXtBottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, stride, cardinality=32, base_width=4):
        super(ResNeXtBottleneck, self).__init__()
        D = cardinality * base_width
        self.conv1 = nn.Conv2d(in_channels, D , kernel_size=1, bias = False)
        self.bn1 = nn.BatchNorm2d(D)
        self.conv2 = nn.Conv2d(D , D , kernel_size=3, stride=stride, padding=1,
                               groups=cardinality, bias= False)
        self.bn2 = nn.BatchNorm2d(D)
        self.conv3 = nn.Conv2d(D , out_channels , kernel_size=1, bias = False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels))

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += identity
        out = self.relu(out)
        return out

class ResNeXt(nn.Module):
    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
        super(ResNeXt, self).__init__()
        self.in_channels = 64
        self.cardinality = cardinality
        self.base_width = bottleneck_width
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(256, num_blocks[0] , stride=1)
        self.layer2 = self._make_layer(512, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(1024, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(2048, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1 , 1))
        self.fc = nn.Linear(2048, num_classes)

    def _make_layer(self, out_channels, blocks, stride):
        layers = []
        layers.append(ResNeXtBottleneck(self.in_channels, out_channels, stride, self.cardinality, self.base_width))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(ResNeXtBottleneck(out_channels, out_channels, 1, self.cardinality, self.base_width))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

def resnext50_cifar():
    return ResNeXt(num_blocks=[3, 4, 6, 3], cardinality=32, bottleneck_width=4)

In [6]:
def run_experiment(epochs, device):
    for name, model_func in [("ResNet-50", resnet50_cifar_manual), ("ResNeXt-50", resnext50_cifar)]:
        model = model_func().to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

        print(f"\n>>> Start Training: {name} for {epochs} Epochs")
        print("-" * 50)

        for epoch in range(epochs):
            model.train()
            running_loss = 0.0
            pbar = tqdm(train_loader, desc=f"{name} Epoch {epoch+1}/{epochs}")
            for images, labels in pbar:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

            avg_train_loss = running_loss / len(train_loader)

            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for images, labels in test_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            accuracy = 100 * correct / total
            print(f"[Epoch {epoch+1} Results] Loss: {avg_train_loss:.4f} | Test Accuracy: {accuracy:.2f}%")

        print(f"\n>>> Finished {name} training.\n")

In [7]:
results_1 = run_experiment(epochs=1 , device=device)


>>> Start Training: ResNet-50 for 1 Epochs
--------------------------------------------------


ResNet-50 Epoch 1/1: 100%|██████████| 391/391 [02:38<00:00,  2.46it/s]


[Epoch 1 Results] Loss: 1.7408 | Test Accuracy: 51.11%

>>> Finished ResNet-50 training.


>>> Start Training: ResNeXt-50 for 1 Epochs
--------------------------------------------------


ResNeXt-50 Epoch 1/1: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 1 Results] Loss: 1.9723 | Test Accuracy: 45.91%

>>> Finished ResNeXt-50 training.



In [8]:
results_5 = run_experiment(epochs=5, device=device)


>>> Start Training: ResNet-50 for 5 Epochs
--------------------------------------------------


ResNet-50 Epoch 1/5: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 1 Results] Loss: 1.8738 | Test Accuracy: 50.24%


ResNet-50 Epoch 2/5: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 2 Results] Loss: 1.1850 | Test Accuracy: 58.10%


ResNet-50 Epoch 3/5: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 3 Results] Loss: 0.8817 | Test Accuracy: 71.45%


ResNet-50 Epoch 4/5: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 4 Results] Loss: 0.6594 | Test Accuracy: 74.39%


ResNet-50 Epoch 5/5: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 5 Results] Loss: 0.5014 | Test Accuracy: 76.74%

>>> Finished ResNet-50 training.


>>> Start Training: ResNeXt-50 for 5 Epochs
--------------------------------------------------


ResNeXt-50 Epoch 1/5: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 1 Results] Loss: 2.0605 | Test Accuracy: 44.80%


ResNeXt-50 Epoch 2/5: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 2 Results] Loss: 1.3747 | Test Accuracy: 55.02%


ResNeXt-50 Epoch 3/5: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 3 Results] Loss: 1.0405 | Test Accuracy: 64.05%


ResNeXt-50 Epoch 4/5: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 4 Results] Loss: 0.8230 | Test Accuracy: 63.81%


ResNeXt-50 Epoch 5/5: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 5 Results] Loss: 0.6635 | Test Accuracy: 73.44%

>>> Finished ResNeXt-50 training.



In [10]:
results_10 = run_experiment(epochs = 10, device=device)


>>> Start Training: ResNet-50 for 10 Epochs
--------------------------------------------------


ResNet-50 Epoch 1/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 1 Results] Loss: 1.7904 | Test Accuracy: 48.76%


ResNet-50 Epoch 2/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 2 Results] Loss: 1.2089 | Test Accuracy: 64.26%


ResNet-50 Epoch 3/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 3 Results] Loss: 0.8832 | Test Accuracy: 69.29%


ResNet-50 Epoch 4/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 4 Results] Loss: 0.6507 | Test Accuracy: 72.64%


ResNet-50 Epoch 5/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 5 Results] Loss: 0.4922 | Test Accuracy: 77.02%


ResNet-50 Epoch 6/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 6 Results] Loss: 0.3754 | Test Accuracy: 73.75%


ResNet-50 Epoch 7/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 7 Results] Loss: 0.2767 | Test Accuracy: 76.10%


ResNet-50 Epoch 8/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 8 Results] Loss: 0.2180 | Test Accuracy: 78.60%


ResNet-50 Epoch 9/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 9 Results] Loss: 0.1671 | Test Accuracy: 77.88%


ResNet-50 Epoch 10/10: 100%|██████████| 391/391 [02:41<00:00,  2.42it/s]


[Epoch 10 Results] Loss: 0.1373 | Test Accuracy: 78.79%

>>> Finished ResNet-50 training.


>>> Start Training: ResNeXt-50 for 10 Epochs
--------------------------------------------------


ResNeXt-50 Epoch 1/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 1 Results] Loss: 1.9473 | Test Accuracy: 42.66%


ResNeXt-50 Epoch 2/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 2 Results] Loss: 1.2811 | Test Accuracy: 60.96%


ResNeXt-50 Epoch 3/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 3 Results] Loss: 0.9467 | Test Accuracy: 68.19%


ResNeXt-50 Epoch 4/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 4 Results] Loss: 0.7404 | Test Accuracy: 72.95%


ResNeXt-50 Epoch 5/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 5 Results] Loss: 0.5993 | Test Accuracy: 73.73%


ResNeXt-50 Epoch 6/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 6 Results] Loss: 0.4782 | Test Accuracy: 75.88%


ResNeXt-50 Epoch 7/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 7 Results] Loss: 0.3820 | Test Accuracy: 76.29%


ResNeXt-50 Epoch 8/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 8 Results] Loss: 0.3108 | Test Accuracy: 76.24%


ResNeXt-50 Epoch 9/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 9 Results] Loss: 0.2483 | Test Accuracy: 75.76%


ResNeXt-50 Epoch 10/10: 100%|██████████| 391/391 [02:21<00:00,  2.76it/s]


[Epoch 10 Results] Loss: 0.2166 | Test Accuracy: 76.75%

>>> Finished ResNeXt-50 training.



In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cuda
