In [3]:
# 12일차 코드
import torch
from torch import nn

import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt

train_data = datasets.FashionMNIST(
    root= "data",
    train=True,
    download =True,
    transform = ToTensor(),
    target_transform=None
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

class_names = train_data.classes

from torch.utils.data import DataLoader
BATCH_SIZE = 32

train_dataloader = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle=True) # dataset to turn into iterable
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False) # don't necessarily have to shuffle the testing data

class FashionMNISTModelV0(nn.Module):
    def __init__(self, input_shape: int, hidden_units:int, output_shape:int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(), # neural networks like their inputs in vector form
            nn.Linear(in_features=input_shape, out_features=hidden_units), # in_features = number of features in a data sample (784 pixels, 28*28형태의 이미지 -> [784]크기의 벡터)
            nn.Linear(in_features=hidden_units, out_features=output_shape)
        )
    def forward(self,x):
        return self.layer_stack(x)
    
# input_shape = 784 : this is how many features goin in the model (in our case, it's one for every pixel in the target image)
# hidden_units = 10 : number of units/neurons in the hidden layer(s)
# output_shape = len(class_names) : Since working with a multi-class classification problem, we need an ouput neuron per class in our dataset

torch.manual_seed(42)

model_0 = FashionMNISTModelV0(input_shape=784, 
                              hidden_units=10, 
                              output_shape=len(class_names)) # one for every class
model_0.to("cpu")


from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss() # loss function은 criterion, cost function이라고 불리기도 함
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

from timeit import default_timer as timer
def print_train_time(start:float, end: float, device: torch.device = None):
    total_time = end-start
    print(f"Train time on {device} : {total_time:.3f} seconds")
    return total_time

In [4]:
# Make predictions and get Model 0 results
import torch
from torch import nn

torch.manual_seed(42)
def eval_model(model: torch.nn.Module,
               data_loader : torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn):
    loss, acc = 0,0
    model.eval()
    with torch.inference_mode():
        for X,y in data_loader:
            y_pred = model(X)
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y,
                               y_pred = y_pred.argmax(dim=1))
            
        loss /= len(data_loader)
        acc /= len(data_loader)

    # 반환값 : 모델이 data_loader에 대해 예측한 결과를 담은 딕셔너리
    return {"model_name": model.__class__.__name__,
            "model_loss": loss.item(),
            "model_acc": acc}
model_0_results = eval_model(model=model_0, data_loader=test_dataloader,
                             loss_fn=loss_fn, accuracy_fn= accuracy_fn)
print(model_0_results)

{'model_name': 'FashionMNISTModelV0', 'model_loss': 2.3190648555755615, 'model_acc': 10.852635782747603}


In [5]:
# setup device agnostic-code
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [6]:
# model 1: Building a better model with non-linearity

# non-linear and linear layers
class FashionMNISTModelV1(nn.Module):
    def __init__(self, input_shape: int, hidden_units:int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(), # adding non-linear layers
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU()
        )
    def forward(self, x:torch.Tensor):
        return self.layer_stack(x)

In [7]:
torch.manual_seed(42)
model_1 = FashionMNISTModelV1(input_shape =784,
                              hidden_units=10,
                              output_shape=len(class_names)).to(device)
print(next(model_1.parameters()).device)
next(model_1.parameters()).device

cuda:0


device(type='cuda', index=0)

In [8]:
# Setup loss, optimizer and evaluation metrics

from helper_functions import accuracy_fn
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.1)

In [9]:
# Functionizing training and test loops
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
        train_loss, train_acc = 0, 0
        model.to(device)
        for batch, (X,y) in enumerate(data_loader):
                X, y = X.to(device), y.to(device)
                y_pred = model(X)
                loss = loss_fn(y_pred, y)
                train_loss += loss
                train_acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        train_loss /= len(data_loader)
        train_acc /= len(data_loader)
        print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
        test_loss, test_acc = 0, 0
        model.to(device)
        model.eval()
        with torch.inference_mode():
                for X,y in data_loader:
                        X, y = X.to(device), y.to(device)

                        test_pred = model(X)

                        test_loss += loss_fn(test_pred, y)
                        test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
                
                test_loss /= len(data_loader)
                test_acc /= len(data_loader)
                print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

In [10]:
torch.manual_seed(42)

from tqdm.auto import tqdm
from timeit import default_timer as timer
train_time_start_on_gpu = timer()

epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n--------------------")
    train_step(data_loader= train_dataloader,
               model=model_1,
               loss_fn=loss_fn,
               optimizer=optimizer,
               accuracy_fn=accuracy_fn)
    test_step(data_loader=test_dataloader,
              model=model_1,
              loss_fn=loss_fn,
              accuracy_fn=accuracy_fn)
train_time_end_on_gpu = timer()
total_train_time_model_1 = print_train_time(start=train_time_start_on_gpu,
                                            end=train_time_end_on_gpu,
                                            device=device)

  from .autonotebook import tqdm as notebook_tqdm
  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0
--------------------
Train loss: 1.09199 | Train accuracy: 61.34%


 33%|███▎      | 1/3 [00:12<00:25, 12.54s/it]

Test loss: 0.95636 | Test accuracy: 65.00%

Epoch: 1
--------------------
Train loss: 0.78101 | Train accuracy: 71.93%


 67%|██████▋   | 2/3 [00:25<00:12, 12.54s/it]

Test loss: 0.72227 | Test accuracy: 73.91%

Epoch: 2
--------------------
Train loss: 0.67027 | Train accuracy: 75.94%


100%|██████████| 3/3 [00:37<00:00, 12.55s/it]

Test loss: 0.68500 | Test accuracy: 75.02%

Train time on cuda : 37.642 seconds





There's a small bottleneck(병목현상) between copying data from the CPU memory (default) to the GPU memory
So for smaller models and datasets, the CPU might actuallly be the optimal place to compute on

데이터셋과 모델이 작은 경우에 GPU로 데이터를 옮기는 데 드는 시간이 GPU의 속도 이점보다 더 커서 오히려 느려질 수 있음
데이터셋과 모델이 클 경우, GPU의 연산 속도 이점이 전송 비용(the cost of getting the data there(GPU))보다 커짐

# 병목 현상 : 시스템 전체 성능, 처리 속도, 효율 등을 제한하는 가장 느린 요소 또는 구간

In [11]:
torch.manual_seed(42)
# Error cause of setup data and model to use device-agnostic code but not our evaluation function (함수에는 device-agnostic을 적용시키지 않았기 때문) 
model_1_results = eval_model(model = model_1,
                             data_loader = test_dataloader,
                             loss_fn=loss_fn,
                             accuracy_fn=accuracy_fn)
model_1_results

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [12]:
import torch.utils.data.dataloader

torch.manual_seed(42)
def eval_model(model: torch.nn.Module,
               data_loader: torch.utils.data.dataloader,
               loss_fn: torch.nn.Module,
               accuracy_fn,
               device: torch.device = device):
    loss, acc = 0,0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            X,y = X.to(device), y.to(device)
            y_pred = model(X)
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
        
        loss /= len(data_loader)
        acc /= len(data_loader)
    return {"model_name": model.__class__.__name__,
            "model_loss": loss.item(),
            "model_acc": acc}

model_1_results = eval_model(model=model_1, 
                             data_loader=test_dataloader,
                             loss_fn = loss_fn, 
                             accuracy_fn=accuracy_fn,
                             device=device)
print(model_1_results)

print(model_0_results)

{'model_name': 'FashionMNISTModelV1', 'model_loss': 0.6850008368492126, 'model_acc': 75.01996805111821}
{'model_name': 'FashionMNISTModelV0', 'model_loss': 2.3190648555755615, 'model_acc': 10.852635782747603}


Two of the main ways to fix overfitting (과적합 해결 주요방법 2가지)
1. Using a smaller or different model (some models fit certain kinds of data better than others) (데이터 종류에 따라 더 잘 맞는 모델이 있음)
2. Using a larger dataset (the more data, the more chance a model has to learn generalizable patterns) (데이터가 많을수록 모델이 일반화 가능한 패턴을 학습할 기회도 많아짐)

# Model 2: Building a Convolutional Neural Network(CNN)

CNN's are known for their capabilities to find patterns in visual data

Using CNN model : TinyVGG from the CNN Explainer website
structure : Input layer -> [convolutional layer -> activation layer -> pooling layer] -> Output layer
 + [convolutional layer -> activation layer -> pooling layer]은 필요에 따라 확장하거나 반복 가능

- 문제 유형에 따라 사용하는 모델 (예외도 존재함)
1. Structured data(Excel spreadsheets, row and column data) | Model : Gradient boosted models, Random Forests, XGBoost | Code example : sklearn.ensemble, XGBoost library
2. Unstructured data(images, audio, language) | Model : Convolutional Neural Networks, Transformers | Code example : torchvision.models, HuggingFace Transformers


## CNN 모델
1. 입력 이미지와 특징 추출

2. 합성곱(Convolution) 연산
  + Kernal : 작은 크기의 가중치 행렬, 이 커널이 입력 이미지 위를 슬라이딩하며 점진적으로 내적 연산(Dot product) 수행
  + Dot product(내적 연산) : 커널 행렬과 입력 이미지의 해당 영역 픽셀값을 곱해서 모두 더함, 계산된 값이 feature map의 한 위치에 대응됨
  + 특징 추출 : 커널은 엣지(선), 색상 변화, 패턴 등을 감지하도록 학습되어, 이미지 내 특정 특징을 잡아냄
  + Stride(걸음걸이) : Kernal이 이미지 위를 이동하는 간격 (보통 1 또는 2 픽셀 단위로 움직이며, stride가 크면 출력 크기가 작아짐)
  + Padding (패딩) : 입력 이미지 주변에 0 또는 특정 값을 덧붙여서 경계 부분에서 손실을 줄임

3. Activation Function (활성화 함수) : Convoltion 결과에 바로 적용되는 함수, 음수 값을 0으로 만들고 양수는 그대로 둠 (비선형성 부여)

4. Pooling layer : 일정 크기의 영역 안에서 가장 큰 값 선택
  + 목적 : 공간 차원 축소(height, width 감소), 잡음과 작은 변동에 대한 강인성 증가, 연산 효율 향상

5. 다중 레이어 구조 : convolution -> activation function -> pooling layer를 여러 층 쌓아올림

6. Flatten layer(1차원 변환) : 다중 채널과 2D 공간으로 된 feature map을 1차원 벡터로 펼침 (해당 벡터가 이후의 Fully Connected Layer에 입력)

7. Fully Connected Layer(완전연결층) : CNN의 마지막 단계는 보통 완전연결층, 모든 입력 뉴런과 출력 뉴런이 연결된 구조 (Flatten된 특징 벡터를 입력받아 클래스별 logits을 출력)

8. Softmax function : logits를 확률 값으로 변환 (이 확률이 각 클래스에 속할 확률을 의미)

9. Prediction : 가장 높은 확률을 가진 클래스를 최종 예측값으로 선택

10. 하이퍼파라미터 조정 : Kernal size, Stride size, Padding size를 조절하여 출력 크기와 특징맵에 미치는 영향이 확인 가능함

In [14]:
# nn.Conv2d() and nn.MaxPool2d() layers from torch.nn
from torch import nn

class FashionMNISTModelV2(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape:int):
        super().__init__()
        self.block_1 =nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,  # 3*3 크기의 필터(커널)를 이미지에 적용
                      stride=1,   # 커널을 1씩 움직임(default, 기본값)
                      padding=1),  # options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)  # default stride value is same as kernel_size
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*7*7,   # 7*7은 합성곱 레이어 출력 크기 계산 공식에 따라서 나온 값
                      out_features=output_shape)      # Where did this in_features shape come from? It's because each layer of our network compresses and changes the shape of our input data
        )
    def forward(self, x:torch.Tensor):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.classifier(x)
        return x

torch.manual_seed(42)
model_2 = FashionMNISTModelV2(input_shape=1,
                              hidden_units=10,
                              output_shape=len(class_names)).to(device)
print(model_2)

# What we've done is a common practice in machine learning. Find a model architecture somewhere and replicate with code

# nn.Conv2d(), also known as a convolutional layer 
# nn.MaxPool2d(), also known as a max pooling layer

# For other dimensional data (such as 1D for text or 3D for 3D objects) there's also nn.Conv1d() and nn.Conv3d()

FashionMNISTModelV2(
  (block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=490, out_features=10, bias=True)
  )
)


In [15]:
torch.manual_seed(42)

images = torch.randn(size=(32,3,64,64))
test_image = images[0]
print(f"Image batch shape: {images.shape} -> [batch_size, color_channels, height, width]")
print(f"Single image shape: {test_image.shape} -> [color_channels, height, width]")
print(f"Single image pixel values:\n{test_image}")

Image batch shape: torch.Size([32, 3, 64, 64]) -> [batch_size, color_channels, height, width]
Single image shape: torch.Size([3, 64, 64]) -> [color_channels, height, width]
Single image pixel values:
tensor([[[ 1.9269,  1.4873,  0.9007,  ...,  1.8446, -1.1845,  1.3835],
         [ 1.4451,  0.8564,  2.2181,  ...,  0.3399,  0.7200,  0.4114],
         [ 1.9312,  1.0119, -1.4364,  ..., -0.5558,  0.7043,  0.7099],
         ...,
         [-0.5610, -0.4830,  0.4770,  ..., -0.2713, -0.9537, -0.6737],
         [ 0.3076, -0.1277,  0.0366,  ..., -2.0060,  0.2824, -0.8111],
         [-1.5486,  0.0485, -0.7712,  ..., -0.1403,  0.9416, -0.0118]],

        [[-0.5197,  1.8524,  1.8365,  ...,  0.8935, -1.5114, -0.8515],
         [ 2.0818,  1.0677, -1.4277,  ...,  1.6612, -2.6223, -0.4319],
         [-0.1010, -0.4388, -1.9775,  ...,  0.2106,  0.2536, -0.7318],
         ...,
         [ 0.2779,  0.7342, -0.3736,  ..., -0.4601,  0.1815,  0.1850],
         [ 0.7205, -0.2833,  0.0937,  ..., -0.1002, -2.3609,

In [None]:
# in_channels (int) - Number of channels in the input image.
# out_channels (int) - Number of channels produced by the convolution. (합성곱 연산을 통해 출력될 채널 수)
# kernel_size (int or tuple) - Size of the convolving kernel/filter.
# stride (int or tuple, optional) - How big of a step the convolving kernel takes at a time. Default: 1.
# padding (int, tuple, str) - Padding added to all four sides of input. Default: 0. (입력의 네 가장자리에 추가되는 픽셀 수)

torch.manual_seed(42)

conv_layer = nn.Conv2d(in_channels=3,
                       out_channels=10,
                       kernel_size=3,
                       stride=1,
                       padding=0) # valid(패딩 없음), same(출력 크기를 입력과 같게 유지) 사용 가능

# Pass the data through the convolutional layer
print(conv_layer(test_image))

# (If running PyTorch 1.11.0+, this won't occur)
# nn.Conv2d() expects a 4d tensor as input 
# nn.Conv2d() layer expects a 4-dimensional tensor as input with size (N, C, H, W) or [batch_size, color_channels, height, width]


tensor([[[ 1.5396,  0.0516,  0.6454,  ..., -0.3673,  0.8711,  0.4256],
         [ 0.3662,  1.0114, -0.5997,  ...,  0.8983,  0.2809, -0.2741],
         [ 1.2664, -1.4054,  0.3727,  ..., -0.3409,  1.2191, -0.0463],
         ...,
         [-0.1541,  0.5132, -0.3624,  ..., -0.2360, -0.4609, -0.0035],
         [ 0.2981, -0.2432,  1.5012,  ..., -0.6289, -0.7283, -0.5767],
         [-0.0386, -0.0781, -0.0388,  ...,  0.2842,  0.4228, -0.1802]],

        [[-0.2840, -0.0319, -0.4455,  ..., -0.7956,  1.5599, -1.2449],
         [ 0.2753, -0.1262, -0.6541,  ..., -0.2211,  0.1999, -0.8856],
         [-0.5404, -1.5489,  0.0249,  ..., -0.5932, -1.0913, -0.3849],
         ...,
         [ 0.3870, -0.4064, -0.8236,  ...,  0.1734, -0.4330, -0.4951],
         [-0.1984, -0.6386,  1.0263,  ..., -0.9401, -0.0585, -0.7833],
         [-0.6306, -0.2052, -0.3694,  ..., -1.3248,  0.2456, -0.7134]],

        [[ 0.4414,  0.5100,  0.4846,  ..., -0.8484,  0.2638,  1.1258],
         [ 0.8117,  0.3191, -0.0157,  ...,  1

In [18]:
print(test_image.unsqueeze(dim=0).shape)
print(conv_layer(test_image.unsqueeze(dim=0)).shape)

torch.Size([1, 3, 64, 64])
torch.Size([1, 10, 62, 62])


In [None]:
torch.manual_seed(42)
conv_layer_2 = nn.Conv2d(in_channels=3, # same number of color channels as our input image 
                         out_channels=10,
                         kernel_size=(5,5), # kernel is usually a square so a tuple also works (kernel size는 정사각형이 아니어도 상관없음)
                         stride=2,
                         padding=0)

# Pass single image through new conv_layer_2 (this calls nn.Conv2d()'s forward() method on the input)
print(conv_layer_2(test_image.unsqueeze(dim=0)).shape)

# Behind the scenes, nn.Conv2d() is compressing the information stored in the image
# It does this by performing oprations on the input(test image) against its internal parameters
# 지금까지 만들어온 신경망과 차이점은 레이어마다 파라미터 업데이트를 계산하는 방식이 다름, 각 레이어의 forward() 메서드에 정의된 연산 방식이 다름
# (The only difference is how the different layers calculate their parameter updates or in Pytorch terms, the operation present in the layer forward() method)

torch.Size([1, 10, 30, 30])


In [None]:
print(conv_layer_2.state_dict()) # check out the conv_layer_2 internal parameters

OrderedDict([('weight', tensor([[[[ 0.0883,  0.0958, -0.0271,  0.1061, -0.0253],
          [ 0.0233, -0.0562,  0.0678,  0.1018, -0.0847],
          [ 0.1004,  0.0216,  0.0853,  0.0156,  0.0557],
          [-0.0163,  0.0890,  0.0171, -0.0539,  0.0294],
          [-0.0532, -0.0135, -0.0469,  0.0766, -0.0911]],

         [[-0.0532, -0.0326, -0.0694,  0.0109, -0.1140],
          [ 0.1043, -0.0981,  0.0891,  0.0192, -0.0375],
          [ 0.0714,  0.0180,  0.0933,  0.0126, -0.0364],
          [ 0.0310, -0.0313,  0.0486,  0.1031,  0.0667],
          [-0.0505,  0.0667,  0.0207,  0.0586, -0.0704]],

         [[-0.1143, -0.0446, -0.0886,  0.0947,  0.0333],
          [ 0.0478,  0.0365, -0.0020,  0.0904, -0.0820],
          [ 0.0073, -0.0788,  0.0356, -0.0398,  0.0354],
          [-0.0241,  0.0958, -0.0684, -0.0689, -0.0689],
          [ 0.1039,  0.0385,  0.1111, -0.0953, -0.1145]]],


        [[[-0.0903, -0.0777,  0.0468,  0.0413,  0.0959],
          [-0.0596, -0.0787,  0.0613, -0.0467,  0.0701],

In [26]:
print(f"conv_layer_2 weight shape: \n{conv_layer_2.weight.shape} -> [out_channels=10, in_channels=3, kernel_size=5, kernel_size=5]")
print(f"\nconv_layer_2 bias shape: \n{conv_layer_2.bias.shape} -> [out_channels =10]")

conv_layer_2 weight shape: 
torch.Size([10, 3, 5, 5]) -> [out_channels=10, in_channels=3, kernel_size=5, kernel_size=5]

conv_layer_2 bias shape: 
torch.Size([10]) -> [out_channels =10]


In [28]:
# Stepping through nn.MaxPool2d()

print(f"Test image original shape: {test_image.shape}")
print(f"Test image with unsqueezed dimension: {test_image.unsqueeze(dim=0).shape}")

max_pool_layer = nn.MaxPool2d(kernel_size=2)

test_image_through_conv = conv_layer(test_image.unsqueeze(dim=0))
print(f"Shape after going through conv_layer(): {test_image_through_conv.shape}")

test_image_through_conv_and_max_pool = max_pool_layer(test_image_through_conv)
print(f"Shape after going through conv_layer() and max_pool_layer(): {test_image_through_conv_and_max_pool.shape}")

Test image original shape: torch.Size([3, 64, 64])
Test image with unsqueezed dimension: torch.Size([1, 3, 64, 64])
Shape after going through conv_layer(): torch.Size([1, 10, 62, 62])
Shape after going through conv_layer() and max_pool_layer(): torch.Size([1, 10, 31, 31])


nn.Conv2d, nn.MaxPool2d 차이점

1. nn.Conv2d <- 가중치와 편향을 학습, 커널이 입력 이미지의 각 영역과 가중치 기반 연산 수행
2. nn.MaxPool2d <- 지정된 영역의 최댓값만 추출, 고정된 연산, 학습 없음