# 신경망 모델 구성

- `torch.nn`: 신경망을 구성하는데 필요한 모든 구성 요소를 제공
- PyTorch의 모든 모듈은 `nn.Module`의 subclass

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = (
    "cuda"          # GPU
    if torch.cuda.is_available()
    else "mps"      # Apple Silicon
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")

Using cpu device


## 클래스 정의
- 신경망 모델을 `nn.Module`의 subclass로 정의
- `__init__`에서 신경망 계층 초기화
- `nn.Module`을 상속받은 모든 클래스는 `forward` 메소드에 input에 대한 연산을 구현

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()     # tensorflow.keras의 Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)      # NeuralNetwork 인스턴스 생성 -> device 이동
print(model)

# 주의! model.forward()를 절대 직접 호출하지 말것!

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)

print(f"Predicted class: {y_pred}")

Predicted class: tensor([0])


## 모델 분해하여 살펴보기

In [6]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [7]:
## nn.Flatten

flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [8]:
## nn.Linear

layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [9]:
## nn.ReLU

print(f"Before ReLU: {hidden1}\n\n")

hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.4266, -0.0634,  0.4144, -0.1885, -0.7122,  0.7277,  0.1327, -0.4899,
          0.0196, -0.3434, -0.3943, -0.1615,  0.2107, -0.1986,  0.4113,  0.2516,
          0.2046, -0.3745, -0.5222, -0.0390],
        [ 0.3508, -0.0217,  0.1415, -0.3540, -0.4608,  0.2962, -0.2919, -0.2693,
          0.2231, -0.0701, -0.3881, -0.4105,  0.0309,  0.1489,  0.0110, -0.1141,
          0.1526, -0.3551, -0.4311, -0.0766],
        [ 0.7970, -0.0447,  0.1552, -0.0569, -0.3590,  0.4634,  0.1966, -0.2421,
         -0.0304, -0.1946, -0.4883, -0.5455,  0.0340, -0.1611,  0.0759, -0.1331,
          0.1301, -0.2865, -0.5892, -0.2554]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.4266, 0.0000, 0.4144, 0.0000, 0.0000, 0.7277, 0.1327, 0.0000, 0.0196,
         0.0000, 0.0000, 0.0000, 0.2107, 0.0000, 0.4113, 0.2516, 0.2046, 0.0000,
         0.0000, 0.0000],
        [0.3508, 0.0000, 0.1415, 0.0000, 0.0000, 0.2962, 0.0000, 0.0000, 0.2231,
         0.0000, 0.0000, 0.0000, 0.0309, 0.1489, 0.01

In [10]:
## nn.Sequential

seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

In [11]:
## nn.Softmax

softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [12]:
## 모델 매개변수

print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]}\n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-0.0144,  0.0214, -0.0031,  ..., -0.0049, -0.0084, -0.0256],
        [-0.0052,  0.0121, -0.0244,  ...,  0.0221,  0.0217,  0.0284]],
       grad_fn=<SliceBackward0>)

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([0.0314, 0.0004], grad_fn=<SliceBackward0>)

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0295, -0.0042,  0.0373,  ..., -0.0420, -0.0234,  0.0360],
        [ 0.0378,  0.0205,  0.0280,  ..., -0.0271, -0.0351,  0.0242]],
       grad_fn=<SliceBackward0>)

Layer: linear_relu_stack.2.bias | Size: to