### In the lecture you have been introduced to VGG16. For this problem your task is to implement a VGG like CNN architecture for classification on the CIFAR10 dataset.

In [13]:
import torch
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

In [19]:
num_workers = 2
batch_size = 16
%config Completer.use_jedi = False

In [20]:
torch.cuda.init()

AssertionError: Torch not compiled with CUDA enabled

### 1. Load the dataset (0.5 point)
To load the dataset, you can use the inbuilt dataloader for CIFAR10 provided in the torchvision package. Load both test set and trainset separately. Define the transformations you might need to load the data appropriately.

In [21]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

trainset = torchvision.datasets.CIFAR10(root='./CIFAR10', train = True,
                                        download = True, transform = transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, 
                                            shuffle = True, num_workers = num_workers)

testset = torchvision.datasets.CIFAR10(root='./CIFAR10', train = False,
                                        download = True, transform = transform)

testloader = torch.utils.data.DataLoader(testset, batch_size = batch_size, 
                                            shuffle = True, num_workers = num_workers)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [22]:
print(len(trainset))
print(trainset[0][0].size())

50000
torch.Size([3, 32, 32])


### Create the model architecture (1.0 point)
Implement the class below such that the final architecture follows the same pattern of layers as VGG16.

In [23]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size = 3, stride = 1, padding = 1)
        self.conv2_1 = nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size = 3, stride = 1, padding = 1)
        self.conv3_1 = nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1)
        self.max_pool = nn.MaxPool2d(2, stride = 2, padding = 0)
        self.fc1 = nn.Linear(256*4*4, 2048)
        self.fc2 = nn.Linear(2048, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.conv1_1(x)
        x = self.max_pool(self.conv1_2(x))
        x = self.conv2_1(x) 
        x = self.max_pool(self.conv2_2(x))
        x = self.conv3_1(x)
        x = self.max_pool(self.conv3_2(x))
        x = x.view(-1, 256*4*4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        out = self.fc3(x)
        return out


net = Net()#.cuda()

### Loss function and optimizer (0.5 point)
Define the loss function and optimizer to train the model.

In [24]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

### Train the model (1.0 point)

In [26]:
epochs = 1

for epoch in range(epochs): 
    running_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data 

        # Reset gradient
        optimizer.zero_grad()

        # Feed forward 
        outputs = net.forward(inputs)
        loss = criterion(outputs, labels)

        # Backpropagation 
        loss.backward()
        optimizer.step()

        # Print statistics 
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
        

print('Finished Training')

NameError: name 'running_loss' is not defined

Code below generates the class wise accuracy of the model. You can use the results from the code below to decide the values of hyperparametrs.

In [None]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.cuda()
        labels = labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))