In [5]:
'''
This is starter code for Assignment 2 Problem 1 of CMPT 726 Fall 2020.
The file is adapted from the repo https://github.com/chenyaofo/CIFAR-pretrained-models
'''

import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
import sys
NUM_EPOCH = 8

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class CifarResNet(nn.Module):

    def __init__(self, block, layers, num_classes=100):
        super(CifarResNet, self).__init__()
        self.inplanes = 16
        self.conv1 = conv3x3(3, 16)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(block, 16, layers[0])
        self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 64, layers[2], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

######################################################
####### Do not modify the code above this line #######
######################################################

class cifar_resnet20(nn.Module):
    def __init__(self):
        super(cifar_resnet20, self).__init__()
        ResNet20 = CifarResNet(BasicBlock, [3, 3, 3])
        url ='https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar100_resnet20-23dac2f1.pt'
		
        ResNet20.load_state_dict(model_zoo.load_url(url))
        modules = list(ResNet20.children())[:-1]
        backbone = nn.Sequential(*modules)
        self.backbone = nn.Sequential(*modules)
        self.fc = nn.Linear(64, 10)

    def forward(self, x):
        out = self.backbone(x)
        out = out.view(out.shape[0], -1)
        return self.fc(out)



if __name__ == '__main__':
    model = cifar_resnet20().to(device)
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465),
                                                         std=(0.2023, 0.1994, 0.2010))])
    trainset = datasets.CIFAR10('./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)
    
    testset = datasets.CIFAR10('./data', train=False, download=True, transform=transform)
    valloader = torch.utils.data.DataLoader(testset, batch_size=32, num_workers=2)

    save_PATH = './model'
    min_loss = sys.float_info.max
    best_lambda = 0
    max_acc = 0
    # lambdas = [1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
    lambdas = [1e-6]

    criterion = nn.CrossEntropyLoss()

    # try different lambdas for L2 norm
    for wd in lambdas:
        print('------------------- lambda = %.8f -------------------' % (wd))

        # try different optimizer
        optimizer = optim.Adam(list(model.fc.parameters()), lr=0.001, weight_decay=wd)
        # optimizer = optim.SGD(list(model.fc.parameters()), lr=0.001, momentum=0.9, weight_decay=1e-6)

        # try lr decay
        # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.7)
        # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[6,10], gamma=0.1)

        ## Do the training
        for epoch in range(NUM_EPOCH):  # loop over the dataset multiple times
            running_loss = 0.0
            acc_sum = 0.0
            total_sum = 0.0
            for i, data in enumerate(trainloader, 0):
                # get the inputs
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, predicted = torch.max(outputs.data, 1)

                # save information for accuracy
                acc_sum += torch.eq(labels, predicted).sum()
                total_sum += labels.size(0)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                if i % 20 == 19:    # print every 20 mini-batches
                    print('[%d, %5d] loss: %.3f  accuracy: %.3f (%d, %d)' %
                        (epoch + 1, i + 1, running_loss / 20, acc_sum / total_sum, acc_sum, total_sum))
                    running_loss = 0.0
                    acc_sum = 0.0
                    total_sum = 0.0

            # validation
            with torch.no_grad():
                i = 0
                val_loss = 0.0
                vali_acc_num = 0.0
                vali_total_sum = 0.0
                for x_val, y_val in valloader:
                    x_val = x_val.to(device)
                    y_val = y_val.to(device)
                    model.eval()

                    # calculate loss and accuracy
                    y_hat = model(x_val)
                    one_loss = criterion(y_hat, y_val)
                    _, predicted = torch.max(y_hat.data, 1)
                    vali_acc_num += torch.eq(y_val, predicted).sum()
                    vali_total_sum += y_val.size(0)
                    i += 1
                    val_loss += one_loss.item()
                avg_loss = val_loss / i
                vali_acc = vali_acc_num / vali_total_sum
                print('[%d] validation loss: %.3f  accuracy: %.2f%%'%
                        (epoch + 1, avg_loss, vali_acc * 100))
                
                # update and save the best model
                if avg_loss < min_loss:
                    min_loss = avg_loss
                    best_lambda = wd
                    max_acc = vali_acc
                    torch.save(model, save_PATH)

            # scheduler.step()

    print('Finished Training')
    print('Min loss: %.3f' % (min_loss))
    print('Max validation accuracy: %.2f%%' % (max_acc * 100))
    print('Best lambda: %f' % (best_lambda))


Files already downloaded and verified
Files already downloaded and verified
------------------- lambda = 0.00000100 -------------------
[1,    20] loss: 2.318  accuracy: 0.184 (118, 640)
[1,    40] loss: 2.138  accuracy: 0.230 (147, 640)
[1,    60] loss: 2.012  accuracy: 0.245 (157, 640)
[1,    80] loss: 1.897  accuracy: 0.312 (200, 640)
[1,   100] loss: 1.757  accuracy: 0.367 (235, 640)
[1,   120] loss: 1.702  accuracy: 0.422 (270, 640)
[1,   140] loss: 1.629  accuracy: 0.427 (273, 640)
[1,   160] loss: 1.521  accuracy: 0.480 (307, 640)
[1,   180] loss: 1.561  accuracy: 0.445 (285, 640)
[1,   200] loss: 1.471  accuracy: 0.497 (318, 640)
[1,   220] loss: 1.405  accuracy: 0.530 (339, 640)
[1,   240] loss: 1.440  accuracy: 0.517 (331, 640)
[1,   260] loss: 1.382  accuracy: 0.534 (342, 640)
[1,   280] loss: 1.356  accuracy: 0.534 (342, 640)
[1,   300] loss: 1.355  accuracy: 0.536 (343, 640)
[1,   320] loss: 1.304  accuracy: 0.562 (360, 640)
[1,   340] loss: 1.328  accuracy: 0.552 (353, 64