### CIFAR-10 Pretrained ResNeXt in PyTorch

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
# Define ResNeXT block
class ResNeXtBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride, cardinality, bottleneck_width):
        super(ResNeXtBlock, self).__init__()
        self.cardinality = cardinality
        self.bottleneck_width = bottleneck_width
        self.group_width = cardinality * bottleneck_width

        self.conv1 = nn.Conv2d(in_channels, self.group_width, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.group_width)
        self.conv2 = nn.Conv2d(self.group_width, self.group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(self.group_width)
        self.conv3 = nn.Conv2d(self.group_width, out_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [4]:
# Define ResNeXT model
class ResNeXt(nn.Module):
    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
        super(ResNeXt, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(64, num_blocks[0], 1, cardinality, bottleneck_width)
        self.layer2 = self._make_layer(128, num_blocks[1], 2, cardinality, bottleneck_width)
        self.layer3 = self._make_layer(256, num_blocks[2], 2, cardinality, bottleneck_width)
        self.layer4 = self._make_layer(512, num_blocks[3], 2, cardinality, bottleneck_width)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, out_channels, num_blocks, stride, cardinality, bottleneck_width):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(ResNeXtBlock(self.in_channels, out_channels, stride, cardinality, bottleneck_width))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [5]:
# Load CIFAR-10 dataset
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [10]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [6]:
# Instantiate the ResNeXT model
net = ResNeXt(num_blocks=[3, 4, 6, 3], cardinality=32, bottleneck_width=4) #ResNeXt29_32x4d

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [9]:
def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_params(net)

1179594

In [7]:
# Training loop
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

for epoch in range(10):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:    # print every 200 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

print('Finished Training')

[1,   200] loss: 1.798
[2,   200] loss: 1.238
[3,   200] loss: 0.992
[4,   200] loss: 0.830
[5,   200] loss: 0.726
[6,   200] loss: 0.628
[7,   200] loss: 0.555
[8,   200] loss: 0.500
[9,   200] loss: 0.464
[10,   200] loss: 0.429
Finished Training


In [8]:
# Test the network on the test data
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 81 %


In [11]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4): #batch size is 100, so we use 4 for batch size of 100
            try:
                label = labels[i].item()
                class_correct[label] += c[i].item()
                class_total[label] += 1
            except IndexError:
                pass #Last batch may be smaller.

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 88 %
Accuracy of   car : 93 %
Accuracy of  bird : 60 %
Accuracy of   cat : 72 %
Accuracy of  deer : 81 %
Accuracy of   dog : 78 %
Accuracy of  frog : 84 %
Accuracy of horse : 93 %
Accuracy of  ship : 81 %
Accuracy of truck : 89 %
