In [1]:
from pprint import pprint

import torch
import torch.nn as nn
import torch.nn.functional as F

## Lesson 5 - Model
 - 이번 실습 자료에서는 강의시간에 다루었던 파이토치 모델을 정의하는 방법에 대해 실습하겠습니다.
 - 파이토치 모델은 기본적으로 `nn.Module` 클래스를 상속하여 사용합니다.
     - [공식문서](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)에 따르면 `nn.Module` 은 다음과 같은 기능을 합니다
     ```
     Base class for all neural network modules.
     Your models should also subclass this class.
     Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes:
     ```

In [2]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, bias=True)
        self.bn1 = nn.BatchNorm2d(num_features=3)
        self.conv2 = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3, bias=False)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        return F.relu(self.conv2(x))

In [3]:
model = Model()
model

Model(
  (conv1): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1), bias=False)
)

### 모델 디버깅
 - 파이토치 모델들은 다음과 같읕 방법들을 통해 파라미터를 눈으로 확인할 수 있습니다.

In [4]:
# 1. using named_parameters()
for param, weight in model.named_parameters():
    print(f"{param:20} - size: {weight.size()}")
    print(weight)
    print("-" * 100)
    print()

conv1.weight         - size: torch.Size([3, 1, 3, 3])
Parameter containing:
tensor([[[[ 0.0798,  0.0982, -0.0689],
          [ 0.2807,  0.1054, -0.0321],
          [ 0.2718, -0.3276, -0.1768]]],


        [[[-0.3259,  0.1706,  0.2598],
          [-0.0917,  0.1226, -0.2796],
          [ 0.1066, -0.1066,  0.1998]]],


        [[[ 0.1872,  0.3121,  0.2176],
          [-0.2487,  0.3190, -0.2874],
          [-0.3131, -0.3031, -0.1055]]]], requires_grad=True)
----------------------------------------------------------------------------------------------------

conv1.bias           - size: torch.Size([3])
Parameter containing:
tensor([ 0.3161,  0.2642, -0.1567], requires_grad=True)
----------------------------------------------------------------------------------------------------

bn1.weight           - size: torch.Size([3])
Parameter containing:
tensor([1., 1., 1.], requires_grad=True)
----------------------------------------------------------------------------------------------------

bn1.b

In [5]:
# 2. directly access with member variable
print(model.conv1.weight)
print(model.conv1.bias)

Parameter containing:
tensor([[[[ 0.0798,  0.0982, -0.0689],
          [ 0.2807,  0.1054, -0.0321],
          [ 0.2718, -0.3276, -0.1768]]],


        [[[-0.3259,  0.1706,  0.2598],
          [-0.0917,  0.1226, -0.2796],
          [ 0.1066, -0.1066,  0.1998]]],


        [[[ 0.1872,  0.3121,  0.2176],
          [-0.2487,  0.3190, -0.2874],
          [-0.3131, -0.3031, -0.1055]]]], requires_grad=True)
Parameter containing:
tensor([ 0.3161,  0.2642, -0.1567], requires_grad=True)


### 학습된 모델 저장하기
 - `torch.save(model.state_dict(), save_path)`

In [6]:
import os

save_folder = "./runs/"
save_path = os.path.join(save_folder, "best.pth")   # ./runs/best.pth
os.makedirs(save_folder, exist_ok=True)  

torch.save(model.state_dict(), save_path)
print(f"Model saving success at {save_path}")
print(f"Saved models : {os.listdir(save_folder)}")

Model saving success at ./runs/best.pth
Saved models : ['best.pth']


### 저장된 모델 불러오기
 - model.load_state_dict(torch.load(save_path))

In [7]:
new_model = Model()
new_model.load_state_dict(torch.load(save_path))
print(f"Model loading success from {save_path}")

Model loading success from ./runs/best.pth


#### 저장된 모델이 잘 불러와졌는지 확인해봅시다

In [8]:
for (name, trained_weight), (_, saved_weight) in zip(model.named_parameters(), new_model.named_parameters()):
    is_equal = torch.equal(trained_weight, saved_weight)
    print(f"parameter {name:15} from trained model and loaded model is equal? -> {is_equal}")

parameter conv1.weight    from trained model and loaded model is equal? -> True
parameter conv1.bias      from trained model and loaded model is equal? -> True
parameter bn1.weight      from trained model and loaded model is equal? -> True
parameter bn1.bias        from trained model and loaded model is equal? -> True
parameter conv2.weight    from trained model and loaded model is equal? -> True


#### state_dict() 이 무엇인가요?
 - 모델의 저장과 로딩에 `state_dict()` 을 사용하는데, 기능이 무엇인가요?
 - 기본적으로 위에서 살펴본 `.named_parameters()` 와 매우 유사합니다
 - model parameter 를 Key 로 가지고, model weights 를 Value 로 가지는 파이썬 딕셔너리일 뿐입니다. 
   (정확한 Type 은 파이썬 내장 라이브러리 collections.OrderDict 입니다)

In [9]:
for param, weight in model.state_dict().items():
    print(f"{param:20} - size: {weight.size()}")
    print(weight)
    print("-" * 100)

conv1.weight         - size: torch.Size([3, 1, 3, 3])
tensor([[[[ 0.0798,  0.0982, -0.0689],
          [ 0.2807,  0.1054, -0.0321],
          [ 0.2718, -0.3276, -0.1768]]],


        [[[-0.3259,  0.1706,  0.2598],
          [-0.0917,  0.1226, -0.2796],
          [ 0.1066, -0.1066,  0.1998]]],


        [[[ 0.1872,  0.3121,  0.2176],
          [-0.2487,  0.3190, -0.2874],
          [-0.3131, -0.3031, -0.1055]]]])
----------------------------------------------------------------------------------------------------
conv1.bias           - size: torch.Size([3])
tensor([ 0.3161,  0.2642, -0.1567])
----------------------------------------------------------------------------------------------------
bn1.weight           - size: torch.Size([3])
tensor([1., 1., 1.])
----------------------------------------------------------------------------------------------------
bn1.bias             - size: torch.Size([3])
tensor([0., 0., 0.])
--------------------------------------------------------------------

In [10]:
from collections import OrderedDict
print(f"model.state_dict() type is : {type(model.state_dict())}")
type(model.state_dict()) == OrderedDict

model.state_dict() type is : <class 'collections.OrderedDict'>


True

#### `named_parameters()` 을 안쓰고 `state_dict()` 을 사용하는 이유가 무언인가요? (둘이 뭐가 다른가요)
 - `named_parameters()` : returns only parameters
 - `state_dict()`: returns both parameters and buffers (e.g. BN runnin_mean, running_var)
 
 [Reference](https://stackoverflow.com/a/54747245)

In [11]:
pprint([name for (name, param) in model.named_parameters()])  # named_parameters() : returns only parameters
print()
pprint(list(model.state_dict().keys()))  # state_dict(): retuns both parameters and buffers

['conv1.weight', 'conv1.bias', 'bn1.weight', 'bn1.bias', 'conv2.weight']

['conv1.weight',
 'conv1.bias',
 'bn1.weight',
 'bn1.bias',
 'bn1.running_mean',
 'bn1.running_var',
 'bn1.num_batches_tracked',
 'conv2.weight']


### CPU vs GPU
 - DL 모델은 다양한 프로세서(CPU, GPU, TPU) 를 사용하여 학습을 할 수 있습니다.
 - 따라서, 특정 프로세서에서 학습을 진행하고 싶은 경우 명시적으로 지정해주어야 합니다.

#### cpu()
Moves all model parameters and buffers to the CPU.

In [12]:
model.cpu()
for weight in model.parameters():
    print(f"model device: {weight.device}")

model device: cpu
model device: cpu
model device: cpu
model device: cpu
model device: cpu


#### cuda()
Moves all model parameters and buffers to the GPU.

In [13]:
model.cuda()
for weight in model.parameters():
    print(f"model device: {weight.device}")

model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0


#### to()
Moves and/or casts the parameters and buffers

In [14]:
device_options = ['cpu', 'cuda']
for device_option in device_options:
    device = torch.device(device_option)
    model.to(device)
    
    print(f"Set model device to {device_option}")
    for weight in model.parameters():
        print(f"model device: {weight.device}")
    print()

Set model device to cpu
model device: cpu
model device: cpu
model device: cpu
model device: cpu
model device: cpu

Set model device to cuda
model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0



### forward
 - nn.Module 을 상속한 객체를 직접 호출할 때 수행하는 연산을 정의합니다.
 - `model(input)` 을 통해 모델의 예측값을 계산할 수 있습니다.
 - Defines the computation performed at every call

In [15]:
dummy_input = torch.randn(1, 1, 12, 12).to(device)
model.to(device)
output = model(dummy_input)
print(f"model output: {output.size()}")
output

model output: torch.Size([1, 5, 8, 8])


tensor([[[[0.0000e+00, 2.2994e-01, 1.3038e-01, 2.1326e-01, 0.0000e+00,
           8.5100e-02, 0.0000e+00, 1.1047e-01],
          [1.1781e-03, 0.0000e+00, 0.0000e+00, 1.6044e-01, 0.0000e+00,
           0.0000e+00, 3.4512e-02, 0.0000e+00],
          [4.2940e-02, 0.0000e+00, 1.4471e-01, 0.0000e+00, 6.1308e-01,
           0.0000e+00, 4.1884e-01, 0.0000e+00],
          [0.0000e+00, 4.0171e-01, 1.8243e-01, 4.8579e-01, 4.6658e-02,
           1.9321e-01, 1.8018e-02, 1.6586e-01],
          [5.3419e-02, 0.0000e+00, 3.5406e-02, 0.0000e+00, 0.0000e+00,
           3.0990e-01, 0.0000e+00, 3.6281e-01],
          [2.6523e-01, 5.3182e-02, 5.3142e-02, 1.4562e-01, 1.3132e-02,
           4.0218e-01, 2.0792e-01, 0.0000e+00],
          [1.2249e-02, 0.0000e+00, 7.8594e-02, 7.2349e-02, 0.0000e+00,
           3.8808e-02, 0.0000e+00, 1.6116e-01],
          [0.0000e+00, 1.3675e-01, 0.0000e+00, 0.0000e+00, 1.5735e-01,
           1.0406e-01, 1.3388e-01, 0.0000e+00]],

         [[2.5242e-01, 4.1846e-01, 3.7824e-02,

#### Cautions
 - 모델과 인풋의 device 는 반드시 같아야 합니다.
 - 그렇지 않으면 (Runtime) Error 가 발생합니다.

In [16]:
cpu_device = torch.device('cpu')
gpu_device = torch.device('cuda')

# device is same
dummy_input = dummy_input.to(gpu_device)
model.to(gpu_device)
output = model(dummy_input)  # Fine 
print(f"model output: {output.size()}")

model output: torch.Size([1, 5, 8, 8])


In [17]:
dummy_input = dummy_input.to(cpu_device)
model.to(gpu_device)

# device is different
# RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
output = model(dummy_input)  # raise Error
print(f"model output: {output.size()}")

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

### requires_grad()
 - autograd 가 해당 모델의 연산을 기록할지를 결정합니다
 - false 일 시, 수행하는 연산을 기록하지 않고 따라서 역전파가 되지 않아 학습에서 제외됩니다.
 - Change if autograd should record operations on parameters in this module.

In [18]:
# requires_grad = False
model.requires_grad_(requires_grad=False)
for param, weight in model.named_parameters():
    print(f"param {param:15} required gradient? -> {weight.requires_grad}")

param conv1.weight    required gradient? -> False
param conv1.bias      required gradient? -> False
param bn1.weight      required gradient? -> False
param bn1.bias        required gradient? -> False
param conv2.weight    required gradient? -> False


In [19]:
# requires_grad = True
model.requires_grad_(requires_grad=True)
for param, weight in model.named_parameters():
    print(f"param {param:15} required gradient? -> {weight.requires_grad}")

param conv1.weight    required gradient? -> True
param conv1.bias      required gradient? -> True
param bn1.weight      required gradient? -> True
param bn1.bias        required gradient? -> True
param conv2.weight    required gradient? -> True


### train(), eval()
 - 모델을 training(evaluation) 모드로 전환합니다.
 - training 과 evaluation 이 다르게 작용하는 모듈들(Dropout, BatchNorm) 에 영향을 줍니다.
 - 학습 단계에서는 training 모드로, 인퍼런스 단계에서는 eval 모드로 전환해주어야 합니다.
 - [아래](https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py#L111-L118)는 BatchNorm2d 의 파이토치 구현입니다. `self.training=True` 일 경우에만, `running_mean`, `running_var` 을 tracking 합니다.
 
```
if self.training and self.track_running_stats:
    # TODO: if statement only here to tell the jit to skip emitting this when it is None
    if self.num_batches_tracked is not None:
        self.num_batches_tracked = self.num_batches_tracked + 1
        if self.momentum is None:  # use cumulative moving average
            exponential_average_factor = 1.0 / float(self.num_batches_tracked)
        else:  # use exponential moving average
            exponential_average_factor = self.momentum
```

In [20]:
model.train()  # set model to train mode
print(f"model.bn1.training: {model.bn1.training}")

model.bn1.training: True


In [21]:
model.eval()  # set model to eval mode
print(f"model.bn1.training: {model.bn1.training}")

model.bn1.training: False


### 파이토치 공식 문서에서 nn.Module 에 관한 더 많은 정보를 얻을 수 있습니다.
https://pytorch.org/docs/stable/generated/torch.nn.Module.html

궁금증이 생기면 공식 문서를 참고하는걸 강력 추천합니다.