# Lab 10.6.2: Advanced CNN (Resnet with CIFAR10)

**Jonathan Choi 2021**

**[Deep Learning By Torch] End to End study scripts of Deep Learning by implementing code practice with Pytorch.**

If you have an any issue, please PR below.

[[Deep Learning By Torch] - Github @JonyChoi](https://github.com/jonychoi/Deep-Learning-By-Torch)

## imports

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms

import visdom

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(1)

if device == 'cuda':
    torch.cuda.manual_seed_all(1)

In [4]:
vis = visdom.Visdom()
vis.close(env = "main")

Setting up a new session...


''

## Define Value Tracker

In [5]:
def value_tracker(value_plot, value, index):
    '''num, loss_value are Tensor'''
    vis.line(X = index, Y = value, win = value_plot, update = 'append')

## Transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))

### How to Calculate mean and std in Normalize

In [6]:
transform = transforms.Compose([
    transforms.ToTensor()
])

trainset = datasets.CIFAR10(root = 'cifar10/', download = True, transform = transform, train = True)

print(trainset.data.shape)

train_data_mean = trainset.data.mean(axis = (0, 1, 2))
train_data_std = trainset.data.std(axis = (0, 1, 2))

print(train_data_mean)
print(train_data_std)

train_data_mean = train_data_mean / 255
train_data_std = train_data_std / 255

print(train_data_mean)
print(train_data_std)

Files already downloaded and verified
(50000, 32, 32, 3)
[125.30691805 122.95039414 113.86538318]
[62.99321928 62.08870764 66.70489964]
[0.49139968 0.48215841 0.44653091]
[0.24703223 0.24348513 0.26158784]


In [7]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(train_data_mean, train_data_std)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(train_data_mean, train_data_std)
])

trainset = datasets.CIFAR10(download = True, root = 'cifar10/', transform = transform_train, train = True)
testset = datasets.CIFAR10(download = True, root = 'cifar10/', transform = transform_test, train = False)

train_loader = torch.utils.data.DataLoader(dataset = trainset, batch_size = 256, shuffle = True, num_workers = 0)
test_loader = torch.utils.data.DataLoader(dataset = testset, batch_size = 256, shuffle = True, num_workers = 0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horese', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


## Reform the ResNet since the dataset resolution is too small

In [8]:
import torchvision.models.resnet as resnet
from torch import Tensor
from typing import Type, Any, Callable, Union, List, Optional
BasicBlock = resnet.BasicBlock
Bottleneck = resnet.Bottleneck
conv1x1 = resnet.conv1x1

In [9]:
class ResNet(nn.Module):
    
    def __init__(
        self,
        block: Type[Union[BasicBlock, Bottleneck]],
        layers: List[int],
        num_classes: int = 1000,
        zero_init_residual: bool = False,
        groups: int = 1,
        width_per_group: int = 64,
        replace_stride_with_dilation: Optional[List[bool]] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])








        #change strides from 2 to 1 below -----------------------------





        self.layer2 = self._make_layer(block, 128, layers[1], stride=1,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))





        
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]

    def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int,
                    stride: int = 1, dilate: bool = False) -> nn.Sequential:
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x: Tensor) -> Tensor:
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)

In [10]:
resnet50 = ResNet(resnet.Bottleneck, [3, 4, 6, 3], 10, True).to(device)

### Take a Moment!!!!!!!!!!!!!!!!!

if not normalize the Tensor just like ```torch.Tensor(1, 3, 32, 32).to(device)```

it returns nan to all of classes

**It is important to normalize our data**

In [11]:
test = torch.rand(1, 3, 32, 32).to(device)
out = resnet50(test)
print(out)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


tensor([[ 0.2347, -0.2441,  0.2715,  0.1799,  0.0963, -0.1991,  0.0620, -0.3722,
          0.0864,  0.4435]], device='cuda:0', grad_fn=<AddmmBackward>)


In [12]:
optimizer = optim.SGD(resnet50.parameters(), lr = 0.1, momentum =0.9, weight_decay = 5e-4)
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size =10, gamma = 0.5)

## Make Plot

In [13]:
loss_plt = vis.line(Y=torch.Tensor(1).zero_(), opts = dict(title = "loss_tracker", legend = ['loss'], showlegend = True))
acc_plt = vis.line(Y = torch.Tensor(1).zero_(), opts = dict(title = 'Accuracy', legend = ['Acc'], showlegend = True))

## Define Acc_check Function

In [14]:
def acc_check(net, test_loader, epoch, save = 1):
    correct = 0
    total = 0
    with torch.no_grad():
        for X, Y in test_loader:
            X = X.to(device)
            Y = Y.to(device)

            #prediction
            pred = net(X)

            _, predicted = torch.max(pred, 1)

            total += Y.size(0)
            correct += (predicted == Y).sum().item()

    acc = (100 * correct / total)
    print('Accuracy of the network on the 10000 test images: {}'.format(acc))

    if epoch % 10 == 0:
        if save:
            torch.save(net.state_dict(), "./models/resnet_cifar10/model_epoch_{}_acc_{}.pth".format(epoch, acc))

    return acc

## Training with (acc check + model save)

In [15]:
print(len(train_loader))
epochs = 20

for epoch in range(epochs):

    running_loss = 0.0
    
    for i, data in enumerate(train_loader):
        #get the inputs
        X, Y = data
        X = X.to(device)
        Y = Y.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        pred = resnet50(X)
        
        #cost
        cost = F.cross_entropy(pred, Y).to(device)
        
        #reduce the cost
        cost.backward()
        optimizer.step()
        lr_sche.step()

        #print statistics
        running_loss += cost.item()
        if i % 30 == 29:
            print('Epoch: {} / {}, MiniBatch: {} / {}, Cost: {}'.format(epoch + 1, 20, i, len(train_loader), cost.item()))
            value_tracker(loss_plt, torch.Tensor([running_loss / 30]), torch.Tensor([i + epoch * len(train_loader)]))
        running_loss = 0

    #Check accuracy
    acc = acc_check(resnet50, test_loader, epoch, save = 1)
    value_tracker(acc_plt, torch.Tensor([acc]), torch.Tensor([epoch]))


print('Finshed Learning')
            

196
Epoch: 1 / 20, MiniBatch: 29 / 196, Cost: 1.987879753112793
Epoch: 1 / 20, MiniBatch: 59 / 196, Cost: 1.6570311784744263
Epoch: 1 / 20, MiniBatch: 89 / 196, Cost: 1.669613003730774
Epoch: 1 / 20, MiniBatch: 119 / 196, Cost: 1.70968759059906
Epoch: 1 / 20, MiniBatch: 149 / 196, Cost: 1.6382006406784058
Epoch: 1 / 20, MiniBatch: 179 / 196, Cost: 1.7769445180892944
Accuracy of the network on the 10000 test images: 38.7
Epoch: 2 / 20, MiniBatch: 29 / 196, Cost: 1.604959487915039
Epoch: 2 / 20, MiniBatch: 59 / 196, Cost: 1.6588995456695557
Epoch: 2 / 20, MiniBatch: 89 / 196, Cost: 1.7000317573547363
Epoch: 2 / 20, MiniBatch: 119 / 196, Cost: 1.690345048904419
Epoch: 2 / 20, MiniBatch: 149 / 196, Cost: 1.5970897674560547
Epoch: 2 / 20, MiniBatch: 179 / 196, Cost: 1.5948519706726074
Accuracy of the network on the 10000 test images: 38.92
Epoch: 3 / 20, MiniBatch: 29 / 196, Cost: 1.669651985168457
Epoch: 3 / 20, MiniBatch: 59 / 196, Cost: 1.578109860420227
Epoch: 3 / 20, MiniBatch: 89 / 19

## Model Accuracy Testing

In [19]:
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        X, Y = data
        X = X.to(device)
        Y = Y.to(device)

        #prediction
        pred = resnet50(X)

        _, predicted = torch.max(pred, 1)

        total += Y.size(0)

        correct += (predicted == Y).sum().item()

print('Accuracy of the network on the 10000 test images: {}'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 39.12
