# Convolutional Neural Network 모델 정의

In [1]:
import torch
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [2]:
### Conv2d 를 생성.
layer = nn.Conv2d(
    in_channels=3,   # 입력 데이터의 channel 수. (입력 shape: channel, height, width -> 3차원.)
    out_channels=5, # filter의 개수. feature map의 개수.
    kernel_size=3,    # filter 크기 (height, width) 같은 크기면 정수로 설정. (3, 3)
    stride=1,          # 이동 크기 (상하stride, 좌우stride) stride가 같으면 정수로 설정(1, 1) = default: 1
    padding=0,       # padding 크기 (default: 0- no padding ('valid'))
    # padding="same" # same padding 출력의 size가 입력과 같아지게 padding 알아서 추가.
)
print(layer)

Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))


In [5]:
input_data = torch.ones(1, 3, 10, 10)  # (batch_size, channel 수=in_channels, height, width)
output = layer(input_data)
print(output.shape)  # (1, 5, h:8, w:8)

torch.Size([1, 5, 8, 8])


In [4]:
# (10 - 3 + 2*0)/1 + 1

8.0

In [6]:
output[0, 0]

tensor([[-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446],
        [-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446],
        [-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446],
        [-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446],
        [-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446],
        [-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446],
        [-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446],
        [-0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446, -0.2446]],
       grad_fn=<SelectBackward0>)

In [14]:
#### layer의 weight들 조회
# layer.parameters() 모든 weight들과 bias를 제공하는 generator를 반환.
layer.weight.shape
# [5: filter개수, 3:filter채널수, 3:filter height, 3:fiter width]

torch.Size([5, 3, 3, 3])

In [15]:
# bias 조회 - filter당 1개
layer.bias.shape

torch.Size([5])

In [16]:
p_layer = nn.MaxPool2d(
    kernel_size=2,   # filter 크기 (값을 추출할 영역 크기) (height, width) 같은 크기일 때는 정수.
    stride=2,          # 좌우,  상하로 얼마씩 이동할지. (kernel_size와 동일하게 지정 해서 겹치지 않게 한다.: default(None)) 
)

In [17]:
p_input = torch.randn(1, 4, 4)  # (channel: 1, height: 4, width: 4)
p_output = p_layer(p_input)

In [19]:
p_output.shape

torch.Size([1, 2, 2])

In [20]:
p_input

tensor([[[-0.4206, -0.4688, -0.1028,  0.8246],
         [ 0.2590, -1.2715,  1.3346, -0.7836],
         [-0.5926, -0.6079,  1.4025, -0.3787],
         [-0.7326, -0.0742, -0.0457, -1.2658]]])

In [21]:
p_output

tensor([[[ 0.2590,  1.3346],
         [-0.0742,  1.4025]]])

In [22]:
p_input2 = torch.randn(1, 5, 5)
p_output2 = p_layer(p_input2)

In [23]:
p_output2.shape

torch.Size([1, 2, 2])

In [24]:
p_input2

tensor([[[ 0.8441,  0.6666, -0.7669, -0.6433,  0.2646],
         [ 0.1071, -0.6281, -0.4712,  0.8907, -0.7145],
         [ 1.1848, -0.6985, -0.4843, -1.1012, -0.0762],
         [ 2.8585, -1.5706,  1.3642, -1.5024,  0.1902],
         [-0.8840, -0.4332,  1.2828, -1.0102,  0.8058]]])

In [25]:
p_output2

tensor([[[0.8441, 0.8907],
         [2.8585, 1.3642]]])

In [26]:
p_layer2 = nn.MaxPool2d(
    kernel_size=2, stride=2,
    padding=1 # kernel size보다 작은 영역에서는 최대값을 추출 안한다.(버린다.) => padding을 추가해서 
                    # kernel size에 맞춰서 size가 작은 영역에서도 추출하게 한다.
)
p_output3 = p_layer2(p_input2)
p_output3.shape

torch.Size([1, 3, 3])

In [27]:
p_input2

tensor([[[ 0.8441,  0.6666, -0.7669, -0.6433,  0.2646],
         [ 0.1071, -0.6281, -0.4712,  0.8907, -0.7145],
         [ 1.1848, -0.6985, -0.4843, -1.1012, -0.0762],
         [ 2.8585, -1.5706,  1.3642, -1.5024,  0.1902],
         [-0.8840, -0.4332,  1.2828, -1.0102,  0.8058]]])

In [29]:
p_output3

tensor([[[ 0.8441,  0.6666,  0.2646],
         [ 1.1848, -0.4712,  0.8907],
         [ 2.8585,  1.3642,  0.8058]]])

# MNIST

In [1]:
import os

import torch
from torch import nn
from torchinfo import summary

import matplotlib.pyplot as plt
import numpy as np

from module.data import load_mnist_dataset, load_fashion_mnist_dataset
from module.train import fit
from module.utils import plot_fit_result

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [4]:
### 하이퍼파라미터 선언
epochs = 1
batch_size = 256
lr = 0.001

## Data 준비

In [45]:
## Dataset->DataLoader
data_path = r"C:\Classes\deeplearning\datasets"
train_loader = load_mnist_dataset(data_path, batch_size)
test_loader = load_mnist_dataset(data_path, batch_size, False)

## CNN 모델 정의

In [5]:
# Conv (layer) block
#  1. Conv + ReLU + MaxPooling, (Conv + ReLU + Conv + ReLU +MaxPooling)
#  2. Conv + BatchNorm + ReLU + MaxPooling  (BatchNorm 은 Conv, Activation 사이에 정의)
#  3. Conv + ReLU + Dropout + MaxPooling     (Dropout activiation 다음에 정의)
#  4. Conv + BatchNorm + ReLU + Dropout + MaxPooling
#
#  CNN: filter(channel-depth) 개수는 늘리고 size(height, width)는 줄여나가도록 모델 네트워크를 구성.
####    depth 늘리는 것: Convolution Layer,   size 를 줄이것: Max Pooling 
import torch.nn as nn

class MnistCNNModel(nn.Module):

    def __init__(self, dropout_rate):
        super().__init__()
        # conv block 단위 생성.
        # Conv: kernel size- 3 x 3, stride=1(default), padding=same,   MaxPooling: kernel size-2, stride=1
        self.b1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding="same"), 
            nn.BatchNorm2d(32),  # 입력 channel 수
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.MaxPool2d(kernel_size=2, stride=2)            
        )
        self.b2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding="same"),  # stride기본값: 1
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(p=dropout_rate), 
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.b3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding="same"), 
            nn.BatchNorm2d(128), 
            nn.ReLU(),
            nn.Dropout(p=dropout_rate), 
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1)  # input: 7 X 7 -> padding 1 ( 8 X 8)
        )
        # # 추론기 -> Linear()
        self.classifier = nn.Sequential(
            ## conv output: 3차원,  linear input: 1차원
            nn.Flatten(), 
            nn.Linear(in_features=128*4*4, out_features=256),
            nn.ReLU(), 
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 10)  # out_features=10: 정답 class 수. (0 ~ 9)
        )
    
    def forward(self, X):
        out = self.b1(X)
        out = self.b2(out)
        out = self.b3(out)
        out = self.classifier(out)
        return  out

In [6]:
cnn_mnist_model = MnistCNNModel(0.5)
summary(cnn_mnist_model, (batch_size, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
MnistCNNModel                            [256, 10]                 --
├─Sequential: 1-1                        [256, 32, 14, 14]         --
│    └─Conv2d: 2-1                       [256, 32, 28, 28]         320
│    └─BatchNorm2d: 2-2                  [256, 32, 28, 28]         64
│    └─ReLU: 2-3                         [256, 32, 28, 28]         --
│    └─Dropout: 2-4                      [256, 32, 28, 28]         --
│    └─MaxPool2d: 2-5                    [256, 32, 14, 14]         --
├─Sequential: 1-2                        [256, 64, 7, 7]           --
│    └─Conv2d: 2-6                       [256, 64, 14, 14]         18,496
│    └─BatchNorm2d: 2-7                  [256, 64, 14, 14]         128
│    └─ReLU: 2-8                         [256, 64, 14, 14]         --
│    └─Dropout: 2-9                      [256, 64, 14, 14]         --
│    └─MaxPool2d: 2-10                   [256, 64, 7, 7]           --
├─Sequent

In [7]:
print(cnn_mnist_model)

MnistCNNModel(
  (b1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (b2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (b3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (classi

## Train

In [41]:
# 모델 생성
model = MnistCNNModel(0.3).to(device)
# loss 함수
loss_fn = nn.CrossEntropyLoss() # 다중 분류
# optimizer 
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [47]:
# fit()
save_path = "saved_models/mnist_cnn_model.pth"
result = fit(train_loader, test_loader, model, loss_fn, optimizer, epochs, 
               save_model_path=save_path, device=device, mode="multi")

Epoch[1/1] - Train loss: 0.29476 Train Accucracy: 0.92820 || Validation Loss: 0.28594 Validation Accuracy: 0.93030
<<<<<<<저장: 1 - 이전 : inf, 현재: 0.2859438624233007
49.173094272613525 초


In [43]:
epochs

1

### colab에서 학습한 모델을 이용해서 검증 및 추론

In [41]:
import torch
import torch.nn as nn
from torchvision import transforms
from module.data import load_mnist_dataset
from module.train import test_multi_classification

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [43]:
# 모델 loading
cnn_model = torch.load('saved_models/mnist_cnn_model.pth', 
                                map_location=torch.device('cpu'))  # cuda 에서 학습 모델을 cpu에서 사용하도록 설정
cnn_model = cnn_model.to(device)
cnn_model

MnistCNNModel(
  (b1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (b2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (b3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (classi

In [44]:
### test set loading 후 모델 평가
test_loader = load_mnist_dataset(r"C:\Classes\deeplearning\datasets", 
                                           batch_size=256, is_train=False)
loss, acc = test_multi_classification(test_loader, cnn_model, nn.CrossEntropyLoss(), "cpu")
print(f"Loss: {loss}, Accuracy: {acc}")

Loss: 0.039898493385408074, Accuracy: 0.991


In [14]:
##### 새로운 이미지 추론
from glob import glob
img_path = glob('test_img/num/*.png')
img_path

['test_img/num\\eight.png',
 'test_img/num\\eight2.png',
 'test_img/num\\five.png',
 'test_img/num\\four.png',
 'test_img/num\\one.png',
 'test_img/num\\seven.png',
 'test_img/num\\seven2.png',
 'test_img/num\\three.png',
 'test_img/num\\three2.png',
 'test_img/num\\two.png',
 'test_img/num\\two2.png']

In [47]:
# 한개파일 테스트
import cv2
file_path = 'test_img/num\\eight.png'
img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
# img = cv2.resize(img, (28, 28))
# 전처리
# input_data = transforms.ToTensor()(img)
# input_data = input_data.unsqueeze(dim=0)
tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((28, 28))
])
print(img.shape)
input_data = tf(img).unsqueeze(dim=0)
print(input_data.shape)
# input_data.shape, input_data.max(), input_data.min()
result = cnn_model(input_data)
# print(result)

(48, 51)
torch.Size([1, 1, 28, 28])


In [36]:
result.argmax(dim=-1)

tensor([8])

In [45]:
for file_path in img_path:
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (28, 28))
    # 전처리
    input_data = transforms.ToTensor()(img)
    input_data = input_data.unsqueeze(dim=0)
    # input_data.shape, input_data.max(), input_data.min()
    cnn_model.eval()
    with torch.no_grad():
        result = cnn_model(input_data)
        result_proba = result.softmax(dim=-1)
        print(f"{file_path}: {result.argmax(dim=-1)[0]} - {result_proba.max(-1).values}")

test_img/num\eight.png: 8 - tensor([0.9763])
test_img/num\eight2.png: 8 - tensor([0.9797])
test_img/num\five.png: 5 - tensor([0.7668])
test_img/num\four.png: 4 - tensor([0.9993])
test_img/num\one.png: 1 - tensor([0.6340])
test_img/num\seven.png: 7 - tensor([0.4977])
test_img/num\seven2.png: 7 - tensor([0.7079])
test_img/num\three.png: 3 - tensor([0.9382])
test_img/num\three2.png: 3 - tensor([0.9778])
test_img/num\two.png: 2 - tensor([0.9907])
test_img/num\two2.png: 2 - tensor([0.3904])
