<a href="https://colab.research.google.com/github/haruka-inb/pytorch_practice/blob/main/deep_residual_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Residual Network

Implementated section 4.2 of https://arxiv.org/pdf/1512.03385.pdf

Explanation: https://towardsdatascience.com/resnets-for-cifar-10-e63e900524e0

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [None]:
# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# define hyper parameter
num_epochs = 80
batch_size = 100
learning_rate = 0.001

# image preprocessing modules
transform = transforms.Compose([transforms.Pad(4),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomCrop(32),
                                transforms.ToTensor(),
                                ])

# load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='/../../data', train=True,
                                      download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='/../../data', train=False,
                                     transform=transforms.ToTensor())

# create data loader
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                           shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /../../data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43529617.01it/s]


Extracting /../../data/cifar-10-python.tar.gz to /../../data


In [None]:
# define a method to model 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
  return nn.Conv2d(in_channels, out_channels, kernel_size=3,
                   stride=stride, padding=1, bias=False)

# class for Residual Block
class ResidualBlock(nn.Module):
  # define the network
  def __init__(self, in_channels, out_channels, stride=1, downsample=None):
    super(ResidualBlock, self).__init__()
    self.conv1 = conv3x3(in_channels, out_channels, stride)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(out_channels, out_channels)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.downsample = downsample

  # method to feed data into the network
  def forward(self, x):
    residual = x
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    if self.downsample:
      residual = self.downsample(x)
    out += residual
    out = self.relu(out)

    return out

# class for ResNet
class ResNet(nn.Module):
  # define the network
  # layers means how many blocks are packed in a layer
  def __init__(self, block, layers, num_classes=10):
    super(ResNet, self).__init__()
    self.in_channels = 16
    self.conv = conv3x3(3, 16)
    self.bn = nn.BatchNorm2d(16)
    self.relu = nn.ReLU(inplace=True)
    self.layer1 = self.make_layer(block, 16, layers[0])
    self.layer2 = self.make_layer(block, 32, layers[1], 2)
    self.layer3 = self.make_layer(block, 64, layers[2], 2)
    self.avg_pool = nn.AvgPool2d(8)
    self.fc = nn.Linear(64,num_classes)

  # method to make a layer
  def make_layer(self, block, out_channels, blocks, stride=1):
    downsample = None
    # Paper says downsample input through the first convolution
    # in the layer 2 and 3
    if (stride != 1) or (self.in_channels != out_channels):
      downsample = nn.Sequential(
                    conv3x3(self.in_channels, out_channels, stride=2),
                    nn.BatchNorm2d(out_channels),
                    nn.ReLU(inplace=True)
                    )

    layers = []
    # add the first convolution layer, so it's 1 stride convolution in layer 1,
    # but it's 2 stride in layer 2 and 3 because of downsampling
    layers.append(block(self.in_channels, out_channels, stride, downsample))

    # add the rest of the convolution, so all are stride 1
    self.in_channels = out_channels
    for i in range(1, blocks):
      layers.append(block(out_channels, out_channels))

    return nn.Sequential(*layers)


  # method to feed data into the network
  def forward(self, x):
    out = self.conv(x)
    out = self.bn(out)
    out = self.relu(out)
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.avg_pool(out)
    out = out.view(out.size(0), -1)
    out = self.fc(out)

    return out

# move the model to cuda
# let's say n=1. see paper
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)

# model

In [None]:
# define loss and optimizer
criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# For update learning rate (used fo decay learning rate)
def update_lr(optimizer, lr):
  for param_group in optimizer.param_groups:
    param_group['lr'] = lr

# train the model
curr_lr = learning_rate

for e in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    images, labels = images.to(device), labels.to(device)

  # forward pass
  outputs = model(images)
  loss = criteria(outputs, labels)

  # backward and optimize
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  if (i+1) % 100 == 0:
    print("Epoch {}/{}, Step {}/{}, training loss is {}"
    .format(e+1, num_epochs, i+1, len(train_loader), loss.item()))

  # decay learning rate
  if (e+1) % 20 == 0:
    curr_lr /= 3
    update_lr(optimizer, curr_lr)

# test the model
model.eval()
with torch.no_grad():
  correct, total = 0, 0
  for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)

    outputs = model(images)
    _, pred = torch.max(outputs.data, 1)

    total += len(labels)
    correct += (pred==labels).sum().item()

  print("Accuracy is {} %".format(100*correct/total))

# save the model checkpoint
torch.save(model.state_dict(), "params.ckpt")

Epoch 1/400, Step 500/500, training loss is 2.6262741088867188
Epoch 2/400, Step 500/500, training loss is 4.982049465179443
Epoch 3/400, Step 500/500, training loss is 4.0545525550842285
Epoch 4/400, Step 500/500, training loss is 3.0404510498046875
Epoch 5/400, Step 500/500, training loss is 2.6977615356445312
Epoch 6/400, Step 500/500, training loss is 2.545424699783325
Epoch 7/400, Step 500/500, training loss is 2.687650680541992
Epoch 8/400, Step 500/500, training loss is 2.3720755577087402
Epoch 9/400, Step 500/500, training loss is 2.3953146934509277
Epoch 10/400, Step 500/500, training loss is 2.3366892337799072
Epoch 11/400, Step 500/500, training loss is 2.3307995796203613
Epoch 12/400, Step 500/500, training loss is 2.315534830093384
Epoch 13/400, Step 500/500, training loss is 2.3607523441314697
Epoch 14/400, Step 500/500, training loss is 2.3104403018951416
Epoch 15/400, Step 500/500, training loss is 2.339702844619751
Epoch 16/400, Step 500/500, training loss is 2.2991890