# [ResNet(2015)](https://deep-learning-study.tistory.com/534)
- 전체코드 : https://github.com/Seonghoon-Yu/AI_Paper_Review
- 동빈나: https://github.com/ndb796/Deep-Learning-Paper-Review-and-Practice

### 데이터셋 불러오기
데이터셋은 torchvision 패키지에서 제공하는 STL10 dataset을 이용하겠습니다. STL10 dataset은 10개의 label을 갖으며 train dataset 5000개, test dataset 8000개로 구성됩니다.  

In [1]:
# import package
# model
import torch
import torch.nn as nn 
import torch.nn.functional as F 
from torchsummary import summary 
from torch import optim 
from torch.optim.lr_scheduler import StepLR
import torchvision

# dataset and transformation
from torchvision import datasets 
import torchvision.transforms as transforms 
from torch.utils.data import DataLoader 

# display images
from torchvision import utils
import matplotlib.pyplot as plt
plt.ion()   # 대화형 모드


# utils
import numpy as np 
import os, time, copy

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.10.2
Torchvision Version:  0.11.3


In [None]:
# 데이터셋 다운받을 경로를 지정하고, 데이터셋을 불러옵니다.  

# specify the data path
path_default = "./data"

# if not exists the path, make the directory
if not os.path.exists(path_default):
  os.mkdir(path_default)

# load dataset
# train_ds = datasets.STL10(path_default, split='train', download=True, transform=transforms.ToTensor())
# val_ds = datasets.STL10(path_default, split='test', download=True, transform=transforms.ToTensor())

# print(len(train_ds))
# print(len(val_ds))

In [None]:
data_transforms = {
  'train': transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
  ]),
  'val': transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
  ]),
}

data_dir = 'data/hymenoptera_data'

train_ds = datasets.ImageFolder(
    os.path.join(data_dir, 'train'), data_transforms['train']
  )
val_ds = datasets.ImageFolder(
    os.path.join(data_dir, 'val'), data_transforms['val']
  )

print(len(train_ds))
print(len(val_ds))


In [None]:
# 이미지에 Normalization을 적용하기 위해, 이미지 픽셀값의 평균, 표준편차를 계산합니다.
# To normalize the dataset, calculate the mean and std
traind_meansRGB = [
  np.mean(x.numpy(), axis=(1,2)) for x, _ in train_ds]
train_stdRGB = [
  np.std(x.numpy(), axis=(1,2)) for x, _ in train_ds]

train_meanR = np.mean([m[0] for m in traind_meansRGB])
train_meanG = np.mean([m[1] for m in traind_meansRGB])
train_meanB = np.mean([m[2] for m in traind_meansRGB])
train_stdR = np.mean([s[0] for s in train_stdRGB])
train_stdG = np.mean([s[1] for s in train_stdRGB])
train_stdB = np.mean([s[2] for s in train_stdRGB])

val_meanRGB = [
  np.mean(x.numpy(), axis=(1,2)) for x, _ in val_ds]
val_stdRGB = [
  np.std(x.numpy(), axis=(1,2)) for x, _ in val_ds]

val_meanR = np.mean([m[0] for m in val_meanRGB])
val_meanG = np.mean([m[1] for m in val_meanRGB])
val_meanB = np.mean([m[2] for m in val_meanRGB])

val_stdR = np.mean([s[0] for s in val_stdRGB])
val_stdG = np.mean([s[1] for s in val_stdRGB])
val_stdB = np.mean([s[2] for s in val_stdRGB])

print(train_meanR, train_meanG, train_meanB)
print(val_meanR, val_meanG, val_meanB)

In [None]:
# dataset에 적용할 transformation을 정의합니다.
# define the image transformation
train_transformation = transforms.Compose([
  transforms.ToTensor(),
  transforms.Resize(224),
  transforms.Normalize(
    [train_meanR, train_meanG, train_meanB], [train_stdR, train_stdG, train_stdB]
  ),
  transforms.RandomHorizontalFlip()
])

val_transformation = transforms.Compose([
  transforms.ToTensor(),
  transforms.Resize(224),
  transforms.Normalize(
    [train_meanR, train_meanG, train_meanB], [train_stdR, train_stdG, train_stdB]
  )
])

In [None]:
# transformation을 dataset에 적용하고, dataloader를 생성합니다.
# apply transformation
train_ds.transform = train_transformation
val_ds.transform = val_transformation

# create DataLoader
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32, shuffle=True)

In [None]:
# transformation이 적용된 샘플 이미지를 확인하겠습니다.
# display sample images
def show(img, y=None, color=True):
  npimg = img.numpy()
  npimg_tr = np.transpose(npimg, (1,2,0))
  plt.imshow(npimg_tr)

  if y is not None:
    plt.title(f"labels: {str(y)}")

np.random.seed(1)
torch.manual_seed(1)

grid_size = 4
rnd_inds = np.random.randint(0, len(train_ds), grid_size)
print(f"image indices: {rnd_inds}")

x_grid = [train_ds[i][0] for i in rnd_inds]
y_grid = [train_ds[i][1] for i in rnd_inds]

x_grid = utils.make_grid(x_grid, nrow=grid_size, padding=2)

show(x_grid, y_grid)

### [모델 구축하기](https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/resnet.py)
ResNet은 residual block이 겹겹이 쌓여 구성된 모델입니다.

각각의 residual block을 정의합니다.

In [2]:
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        # BatchNorm에 bias가 포함되어 있으므로, conv2d는 bias=False로 설정합니다.
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion),
        )

        # identity mapping, input과 output의 feature map size, filter 수가 동일한 경우 사용.
        self.shortcut = nn.Sequential()

        self.relu = nn.ReLU()

        # projection mapping using 1x1conv
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion)
            )

    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.relu(x)
        return x


In [3]:
class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )

        self.shortcut = nn.Sequential()

        self.relu = nn.ReLU()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*BottleNeck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*BottleNeck.expansion)
            )
            
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.relu(x)
        return x

In [4]:
[2] + [1] * (3 - 1)

[2, 1, 1]

In [4]:
class ResNet(nn.Module):
    def __init__(self, block, num_block, num_classes=10, init_weights=True):
        super().__init__()

        self.in_channels=64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # weights inittialization
        if init_weights:
            self._initialize_weights()

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion

        return nn.Sequential(*layers)

    def forward(self,x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        x = self.conv3_x(output)
        x = self.conv4_x(x)
        x = self.conv5_x(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    # define weight initialization function
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [5]:
def resnet18():
  return ResNet(BasicBlock, [2, 2, 2, 2])

def resnet34():
  return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
  return ResNet(BottleNeck, [3, 4, 6, 3])

def resnet101():
  return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
  return ResNet(BottleNeck, [3, 8, 36, 3])

In [7]:
# 모델이 잘 구축됐는지 확인합니다.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet50().to(device)
x = torch.randn(3, 3, 224, 224).to(device)
output = model(x)
print(output.size())

torch.Size([3, 10])


In [8]:
summary(model, (3, 224, 224), device=device.type)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,