# ResNet 구현하기

논문을 참고하여 ResNet을 구현해보려하고 한다.  
부스트캠프 내에서 한번 해봤는데 그 때 도움이 많이 되었다고 느껴 다시 한번 해보려고 한다.

ResNet은 가중치를 잃지 않기 위한 방법으로 Residual Learning을 이용하여 성능을 매우 향상시켰으며  
이 이후로 등장하는 네트워크들은 이와 비슷한 시도를 많이 한다.

ResidualBlock은 다음 그림을 나타내며 ResNet을 구현할 때 하나의 단위로서 이용된다.  
  
![ResidualBlock](https://miro.medium.com/max/1140/1*D0F3UitQ2l5Q0Ak-tjEdJg.png)

In [1]:
import torch
import torch.nn as nn

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
# 하나의 단위 블락

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, strides):
        super().__init__()

        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=strides[0], padding=1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU()
                        )
                    
        self.conv2 = nn.Sequential(
                        nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=strides[1], padding=1),
                        nn.BatchNorm2d(out_channels)
                        ) 
        
        if in_channels != out_channels:
            self.shortcut = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=2)
        else:
            self.shortcut = nn.Sequential()

        
    def forward(self, x):
        out = x
        out = self.conv1(out)
        out = self.conv2(out)
        out = out + self.shortcut(x)
        out = nn.functional.relu(out)
        return out

논문의 이미지를 보고 직관적으로 이해할 수 있도록 구현하였다.  
위에서 구현한 ResidualBlock을 이용한다.


![ResidualBlock](https://pytorch.kr/assets/images/resnet.png)

In [3]:
class ResNet34(nn.Module):
    def __init__(self, num_class):
        super().__init__()
        self.num_class= num_class
        self.layer = nn.Sequential(

            # conv1 
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=1),

            # conv2_x
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            ResidualBlock(in_channels=64, out_channels=64, strides=(1, 1)),
            ResidualBlock(in_channels=64, out_channels=64, strides=(1, 1)),
            ResidualBlock(in_channels=64, out_channels=64, strides=(1, 1)),
            

            # conv3_x
            ResidualBlock(in_channels=64, out_channels=128, strides=(2, 1)),
            ResidualBlock(in_channels=128, out_channels=128, strides=(1, 1)),
            ResidualBlock(in_channels=128, out_channels=128, strides=(1, 1)),
            ResidualBlock(in_channels=128, out_channels=128, strides=(1, 1)),

            # conv4_x
            ResidualBlock(in_channels=128, out_channels=256, strides=(2, 1)),
            ResidualBlock(in_channels=256, out_channels=256, strides=(1, 1)),
            ResidualBlock(in_channels=256, out_channels=256, strides=(1, 1)),
            ResidualBlock(in_channels=256, out_channels=256, strides=(1, 1)),
            ResidualBlock(in_channels=256, out_channels=256, strides=(1, 1)),
            ResidualBlock(in_channels=256, out_channels=256, strides=(1, 1)),

            # conv5_x
            ResidualBlock(in_channels=256, out_channels=512, strides=(2, 1)),
            ResidualBlock(in_channels=512, out_channels=512, strides=(1, 1)),
            ResidualBlock(in_channels=512, out_channels=512, strides=(1, 1)),


            # 1x1 average pool, 1000-d fc, softmax
            nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(512, num_class),
            nn.Softmax()
        )


    def forward(self, x):
        return self.layer(x)

### 모델이 제대로 만들어 졌는지 확인

In [4]:
t = torch.randn(1, 3, 227, 227)

# output channel을 10으로 설정해봄
resnet = ResNet34(10)
print(resnet(t))

tensor([[0.3172, 0.0524, 0.0309, 0.0886, 0.1584, 0.0903, 0.0578, 0.0435, 0.0901,
         0.0708]], grad_fn=<SoftmaxBackward0>)


  input = module(input)


## 갖고 있는 데이터로 학습해보기

만든 모델을 가지고 있는 데이터에 대해서 학습해보자.  
나는 음식점에서 찍은 실내, 실외, 음식 3가지의 분류의 데이터를 이용해서 학습한다. 

In [5]:
import torchvision
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

transforms = T.Compose([
                        T.Resize((224,224)),
                        T.ToTensor()
                    ])

train_dataset = ImageFolder(root='./images/images_train', transform=transforms)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = ImageFolder(root='./images/images_test',transform=transforms)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

print(len(train_dataset))
print(len(test_dataset))

31500
13499


### 학습 진행
1. model 정의 
2. loss 함수 정의
3. optimizer 정의
4. scheduler 정의

일반적으로 위와 같은 내용을 정의하고 학습을 진행하면 된다.

In [6]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.get_device_name(0))

model = ResNet34(3).to(device)

from torch.optim import Adam
from torch.optim.lr_scheduler import MultiStepLR

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)
scheduler = MultiStepLR(optimizer, milestones=[28,32], gamma=0.1)

NVIDIA GeForce RTX 2070 SUPER


In [7]:
import os
from tqdm import tqdm


with open('train.log', 'w') as log:
    for Epoch in range(35):

        # var for print loss and acc
        epoch_loss = 0.
        epoch_corrects = 0
        model.train()
        for i, (img, target) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
            img, target = img.to(device), target.to(device)

            optimizer.zero_grad()

            # 모델에 이미지를 집어넣고 결과를 받음

            out = model(img)

            # loss를 구하고 optimizer를 이용하여 기울기 변화
            loss = criterion(out, target)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            epoch_corrects += torch.sum(torch.argmax(out, dim=1) == target.data)

            scheduler.step()

        epoch_loss = epoch_loss/len(train_dataloader)
        epoch_acc = epoch_corrects/len(train_dataset)
        

        # validataion
        val_loss = 0.
        val_acc = 0.
        val_corrects = 0
        if Epoch % 5 == 0 or Epoch == 34:
            model.eval()
            for i, (img, target) in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
                img, target = img.to(device), target.to(device)
                with torch.no_grad():
                    out = model(img)
                loss = criterion(out, target)

                val_loss += loss
                val_corrects += torch.sum(torch.argmax(out, dim=1) == target.data)

            val_loss = val_loss/len(test_dataloader)
            val_acc = val_corrects/len(test_dataset)
        log.write('Epoch %d ||| train_loss: %.4f, train_acc: %.4f, valid_loss: %.4f, valid_acc: %.4f\n'%(Epoch, epoch_loss, epoch_acc, val_loss, val_acc))

  input = module(input)
100%|██████████| 493/493 [03:37<00:00,  2.27it/s]
100%|██████████| 211/211 [00:45<00:00,  4.62it/s]
100%|██████████| 493/493 [03:09<00:00,  2.60it/s]
100%|██████████| 493/493 [03:05<00:00,  2.66it/s]
100%|██████████| 493/493 [03:11<00:00,  2.57it/s]
100%|██████████| 493/493 [03:05<00:00,  2.66it/s]
100%|██████████| 493/493 [02:53<00:00,  2.84it/s]
100%|██████████| 211/211 [00:36<00:00,  5.86it/s]
100%|██████████| 493/493 [02:53<00:00,  2.84it/s]
100%|██████████| 493/493 [02:53<00:00,  2.84it/s]
100%|██████████| 493/493 [03:05<00:00,  2.66it/s]
100%|██████████| 493/493 [03:15<00:00,  2.52it/s]
100%|██████████| 493/493 [03:16<00:00,  2.50it/s]
100%|██████████| 211/211 [00:40<00:00,  5.21it/s]
100%|██████████| 493/493 [03:17<00:00,  2.49it/s]
100%|██████████| 493/493 [03:09<00:00,  2.60it/s]
100%|██████████| 493/493 [03:10<00:00,  2.59it/s]
100%|██████████| 493/493 [03:08<00:00,  2.61it/s]
100%|██████████| 493/493 [03:06<00:00,  2.65it/s]
100%|██████████| 211/211 [