# CIFAR10 데이터를 활용한 ResNet 구현 및 학습

In [1]:
#google drive mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#libraray
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchsummary import summary

In [3]:
#가중치 초기화
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode= 'fan_out', nonlinearity='relu')
        if m.bias is not None:
                nn.init.constant_(m.bias,0)
    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)
        nn.init.constant_(m.bias,0)

In [4]:
#data transformer
transformer = transforms.Compose([
    transforms.RandomCrop(32, padding=4), # 입력 사이즈가 작아 crop 시 padding을 줌.
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

In [5]:
if not os.path.exists('/content/data'):
    os.mkdir('/content/data')

# data load
train_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transformer)
test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transformer)

# DataLoader
train_loader = DataLoader(train_data, batch_size=256, shuffle=True)
test_loader = DataLoader(test_data, batch_size=256, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [21]:
#hyperparameter
num_epochs = 50
batch_size = 256
learning_rate = 0.1

class_list = train_data.classes
num_claases = len(class_list)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(class_list)
print(device)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
cuda


In [None]:
#Residual Block 구조 정의
class BasicBlock(nn.Module): #ResNet18, ResNet34
    mul = 1 #기본 구조에서는 out_chs의 수가 바뀌지 않음으로 1로 설정
    def __init__(self, in_chs:int, out_chs:int, stride=1):
        super().__init__()

        # 입력과 출력의 높이와 너비가 동일하고 identity mapping과의 연산을 위해 채널 수도 동일하게 조정 
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=in_chs, out_channels=out_chs, kernel_size= 3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_chs),
            nn.ReLU()
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(out_chs, out_chs, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_chs)
        )

        #identity map
        self.shortcut = nn.Sequential()

        # F(x)와 x의 size가 안 맞을 경우, x의 모양을 맞춰 줌
        if stride != 1:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_chs, out_chs, kernel_size=1, stride = stride, bias=False),
                nn.BatchNorm2d(out_chs)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class BottleNeck(nn.Module):#ResNet layer 50 이상
    mul = 4 
    def __init__(self, in_chs:int, out_chs:int, stride=1):
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_chs, out_chs, kernel_size=1, stride=stride, bias=False), #BatchNorm에 편향이 있음으로 bias= False
            nn.BatchNorm2d(out_chs),
            nn.ReLU()
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_chs, out_chs, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_chs),
            nn.ReLU()
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(out_chs, out_chs*BottleNeck.mul, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_chs*BottleNeck.mul)
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or out_chs*BottleNeck.mul != in_chs:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_chs, out_chs*BottleNeck.mul, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_chs*BottleNeck.mul)
            )


    def forward(self,x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:
## 모델 생성
class ResNet(nn.Module):
    def __init__(self, block: BasicBlock or BottleNeck , num_blocks:list, num_classes=10):
        super().__init__()
        
        # 첫 conv layer num_out_chs 설정  
        self.init_out_chs = 64

        # 일반적인 conv layer
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, self.init_out_chs, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(self.init_out_chs),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        # residual conv block
        self.conv2_x = self.make_layer(block, 64, num_blocks[0], stride=1)
        self.conv3_x = self.make_layer(block, 128, num_blocks[1], stride=2)
        self.conv4_x = self.make_layer(block, 256, num_blocks[2], stride=2)
        self.conv5_x = self.make_layer(block, 512, num_blocks[3], stride=2)

        # avg, fc layers
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(512*block.mul, num_classes)

    def make_layer(self, block, out_chs, blocks:int, stride):
        # 첫 layer만 크기를 줄이고, 그 다음은 모양을 유지
        strides = [stride] + [1]*(blocks-1)
        layers = []
        for i in range(blocks):
            layers.append(block(self.init_out_chs, out_chs, strides[i]))
            self.init_out_chs = block.mul * out_chs
        return nn.Sequential(*layers)

    def forward(self,x):
        out = self.conv1(x)
        out = self.conv2_x(out)
        out = self.conv3_x(out)
        out = self.conv4_x(out)
        out = self.conv5_x(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1) #(m,512) or (m,2048)
        out = self.linear(out)
        return out

In [None]:
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])

def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])

def ResNet50():
    return ResNet(BottleNeck, [3, 4, 6, 3])

def ResNet101():
    return ResNet(BottleNeck, [3, 4, 23, 3])

def ResNet152():
    return ResNet(BottleNeck, [3, 8, 36, 3])


In [12]:
# ResNet50으로 학습 진행
model = ResNet50().to(device)
model.apply(init_weights)

ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (conv2_x): Sequential(
    (0): BottleNeck(
      (conv1): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (conv2): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (conv3): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [15]:
summary(model, (3,32,32), batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [256, 64, 16, 16]           9,472
       BatchNorm2d-2          [256, 64, 16, 16]             128
              ReLU-3          [256, 64, 16, 16]               0
         MaxPool2d-4            [256, 64, 8, 8]               0
            Conv2d-5            [256, 64, 8, 8]           4,096
       BatchNorm2d-6            [256, 64, 8, 8]             128
              ReLU-7            [256, 64, 8, 8]               0
            Conv2d-8            [256, 64, 8, 8]          36,864
       BatchNorm2d-9            [256, 64, 8, 8]             128
             ReLU-10            [256, 64, 8, 8]               0
           Conv2d-11           [256, 256, 8, 8]          16,384
      BatchNorm2d-12           [256, 256, 8, 8]             512
           Conv2d-13           [256, 256, 8, 8]          16,384
      BatchNorm2d-14           [256, 25

In [27]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = learning_rate, 
                      momentum=0.9, weight_decay=0.0001)

#논문에서는 60만번 중 error plateaus가 나오면 0.1 씩 곱해줬으나, 코랩에선 훈련 한계가 있어 
#30번마다 0.3 씩 곱해주는 걸로 설정
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma = 0.3)

In [28]:
#train
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data,target)  in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        logits = model(data)
        loss = criterion(logits, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (batch_idx+1)%30 == 0:
            print(f'Train Epoch: {epoch} ({100*(batch_idx+1)*batch_size/len(train_loader.dataset):.0f})%',
                  f'Loss: {loss}')
    scheduler.step()

In [29]:
#test
def test(model, device, test_loader):
    model.eval()
    test_loss=0
    correct=0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target)
            pred = output.max(1)[1]
            correct += pred.eq(target).sum().item()
        
        test_loss /= len(test_loader.dataset)
        print(f"\nTest set: Average Loss:{test_loss:.4f}, Accuracy: {100 * correct / len(test_loader.dataset)}%\n")
        print('='*50)

In [30]:
for epoch in range(1,num_epochs+1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

Train Epoch: 1 (15)% Loss: 8.546749114990234
Train Epoch: 1 (31)% Loss: 4.19499397277832
Train Epoch: 1 (46)% Loss: 2.293015956878662
Train Epoch: 1 (61)% Loss: 2.397780656814575
Train Epoch: 1 (77)% Loss: 2.3078858852386475
Train Epoch: 1 (92)% Loss: 2.449352741241455

Test set: Average Loss:0.0132, Accuracy: 17.65%

Train Epoch: 2 (15)% Loss: 2.467991828918457
Train Epoch: 2 (31)% Loss: 2.2306644916534424
Train Epoch: 2 (46)% Loss: 2.5731008052825928
Train Epoch: 2 (61)% Loss: 2.1204001903533936
Train Epoch: 2 (77)% Loss: 2.4383256435394287
Train Epoch: 2 (92)% Loss: 2.0913021564483643

Test set: Average Loss:0.0091, Accuracy: 20.99%

Train Epoch: 3 (15)% Loss: 2.11258864402771
Train Epoch: 3 (31)% Loss: 2.135343551635742
Train Epoch: 3 (46)% Loss: 2.1017465591430664
Train Epoch: 3 (61)% Loss: 2.1499738693237305
Train Epoch: 3 (77)% Loss: 2.0656702518463135
Train Epoch: 3 (92)% Loss: 2.0645227432250977

Test set: Average Loss:0.0092, Accuracy: 24.46%

Train Epoch: 4 (15)% Loss: 1.941