In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np
import time

########################################
# You can define whatever classes if needed
########################################

class ParallelBlock(nn.Module):
    def __init__(self, in_ch, _is_firstblock):
        super(ParallelBlock, self).__init__()
        self._is_firstblock = _is_firstblock
        if _is_firstblock:
            self.BN1 = nn.BatchNorm2d(in_ch)
            self.Conv1 = nn.Conv2d(in_ch, in_ch*2, 3, stride=2, padding=1)
            self.skip_Conv = nn.Conv2d(in_ch, in_ch*2, 1, stride=2)
            self.BN2 = nn.BatchNorm2d(in_ch*2)
            self.Conv2 = nn.Conv2d(in_ch*2, in_ch*2, 3, stride=1, padding=1)
        else:
            self.BN1 = nn.BatchNorm2d(in_ch)
            self.Conv1 = nn.Conv2d(in_ch, in_ch, 3, stride=1, padding=1)
            self.BN2 = nn.BatchNorm2d(in_ch)
            self.Conv2 = nn.Conv2d(in_ch, in_ch, 3, stride=1, padding=1)
        self.Relu = nn.ReLU()
    
    def forward(self, x):
        if not self._is_firstblock:
            skip = x
        first_batchnorm = self.BN1(x)
        first_relu = self.Relu(first_batchnorm)
        if self._is_firstblock:
            skip = self.skip_Conv(first_relu)
        first_conv = self.Conv1(first_relu)
        sec_batchnorm = self.BN2(first_conv)
        sec_relu = self.Relu(sec_batchnorm)
        sec_conv = self.Conv2(sec_relu)
        out = sec_conv+skip
        return out



class IdentityResNet(nn.Module):
    
    # __init__ takes 4 parameters
    # nblk_stage1: number of blocks in stage 1, nblk_stage2.. similar
    def __init__(self, nblk_stage1, nblk_stage2, nblk_stage3, nblk_stage4):
        super(IdentityResNet, self).__init__()
    ########################################
    # Implement the network
    # You can declare whatever variables
    ########################################

    ########################################
    # You can define whatever methods
    ########################################
        self.conv1 = nn.Conv2d(3, 64, 3,stride=1, padding=1)
        modules = []
        for i in range(nblk_stage1):
            modules.append(ParallelBlock(64, False))
        for i in range(nblk_stage2):
            if i == 0:
                modules.append(ParallelBlock(64, True))
            else:
                modules.append(ParallelBlock(128, False))
        for i in range(nblk_stage3):
            if i == 0:
                modules.append(ParallelBlock(128, True))
            else:
                modules.append(ParallelBlock(256, False))
        for i in range(nblk_stage4):
            if i == 0:
                modules.append(ParallelBlock(256, True))
            else:
                modules.append(ParallelBlock(512, False))
        self.wholeBlock = nn.Sequential(*modules)
        self.fc = nn.Linear(512,10)

    def forward(self, x):
        ########################################
        # Implement the network
        # You can declare or define whatever variables or methods
        ########################################
        first_conv = self.conv1(x)
        after_stages = self.wholeBlock(first_conv)
        last_pool =F.avg_pool2d(after_stages,4,stride=4)
        out = self.fc(last_pool.permute(0,2,3,1))
        return torch.squeeze(out)

In [None]:
########################################
# Q1. set device
# First, check availability of GPU.
# If available, set dev to "cuda:0";
# otherwise set dev to "cpu"
########################################
dev = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('current device: ', dev)


########################################
# data preparation: CIFAR10
########################################

########################################
# Q2. set batch size
# set batch size for training data
########################################
batch_size = 10

# preprocessing
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# load training data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)

# load test data
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


# define network
net = IdentityResNet(nblk_stage1=2, nblk_stage2=2,
                     nblk_stage3=2, nblk_stage4=2)

########################################
# Q3. load model to GPU
# Complete below to load model to GPU
########################################
net = net.to(dev)


# set loss function
criterion = nn.CrossEntropyLoss()

########################################
# Q4. optimizer
# Complete below to use SGD with momentum (alpha= 0.9)
# set proper learning rate
########################################
optimizer = optim.SGD(net.parameters(), lr=1e-2, momentum=0.9)


current device:  cuda:0
Files already downloaded and verified
Files already downloaded and verified


In [None]:

# start training
t_start = time.time()

for epoch in range(5):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(dev), data[1].to(dev)
        
        ########################################
        # Q5. make sure gradients are zero!
        # zero the parameter gradients
        ########################################
        optimizer.zero_grad()
        
        ########################################
        # Q6. perform forward pass
        ########################################
        outputs = net(inputs)
        # set loss
        loss = criterion(outputs, labels)
        
        ########################################
        # Q7. perform backprop
        ########################################
        loss.backward()
        
        ########################################
        # Q8. take a SGD step
        ########################################
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            t_end = time.time()
            print('elapsed:', t_end-t_start, ' sec')
            t_start = t_end

print('Finished Training')


[1,  2000] loss: 0.323
elapsed: 126.688321352005  sec
[1,  4000] loss: 0.329
elapsed: 126.43262600898743  sec
[2,  2000] loss: 0.248
elapsed: 189.67149567604065  sec
[2,  4000] loss: 0.271
elapsed: 126.07529067993164  sec
[3,  2000] loss: 0.197
elapsed: 187.3860740661621  sec
[3,  4000] loss: 0.214
elapsed: 124.66136884689331  sec
[4,  2000] loss: 0.150
elapsed: 187.19327640533447  sec
[4,  4000] loss: 0.161
elapsed: 124.92330718040466  sec
[5,  2000] loss: 0.108
elapsed: 186.99386382102966  sec
[5,  4000] loss: 0.126
elapsed: 124.6551501750946  sec
Finished Training


In [None]:

# now testing
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

########################################
# Q9. complete below
# when testing, computation is done without building graphs
########################################
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(dev), data[1].to(dev)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

# per-class accuracy
for i in range(10):
    print('Accuracy of %5s' %(classes[i]), ': ',
          100 * class_correct[i] / class_total[i],'%')

# overall accuracy
print('Overall Accurracy: ', (sum(class_correct)/sum(class_total))*100, '%')


Accuracy of plane :  91.8 %
Accuracy of   car :  90.4 %
Accuracy of  bird :  71.8 %
Accuracy of   cat :  66.3 %
Accuracy of  deer :  80.3 %
Accuracy of   dog :  50.7 %
Accuracy of  frog :  85.6 %
Accuracy of horse :  84.7 %
Accuracy of  ship :  88.0 %
Accuracy of truck :  89.9 %
Overall Accurracy:  79.95 %
