### 가중치 초기화
* pytorch의 nn.Linear와 nn.Conv2d는 He Uniform 기반 weight 초기화
* 가중치 초기화는 nn.init.kaiming_uniform_(), nn.init.kaiming_normal_()등을 이용

In [None]:
import torch
import torch.nn as nn
import math

torch.manual_seed(2025)

linear_01 = nn.Linear(in_features=12, out_features=6)
print(f'weight boundary: {linear_01.weight.min().item()} ~ {linear_01.weight.max().item()}')

# pytorch weight는 1/sqrt(fan_in)
fan_in = linear_01.in_features  
bound = 1 / math.sqrt(fan_in)
print('bound:', bound)

In [None]:
torch.manual_seed(2025)

conv_01 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3)
print(f'weight boundary: {conv_01.weight.min().item()} ~ {conv_01.weight.max().item()}')

# pytorch weight는 1/sqrt(fan_in). Conv2d의 fan_in은 in_channels * kernel_height * kernel_width
fan_in = conv_01.in_channels * 3 * 3
bound = 1 / math.sqrt(fan_in)
print('bound:', bound)

In [None]:
class SimpleCNN_01(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv_1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)
        self.conv_2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.flatten = nn.Flatten()
        self.classifier = nn.Linear(in_features=12544, out_features=num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                print('kaiming normal initialization applied')
                nn.init.kaiming_normal_(m.weight, mode="fan_in", nonlinearity="relu")

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = F.relu(self.conv_2(x))
        x = self.pool(x)
        x = self.flatten(x)
        x = self.classifier(x)

        return x

In [None]:
simple_cnn_01 = SimpleCNN_01(num_classes=10)

### Batch Normalization 적용
* Linear Layer 이후 적용 시는 BatchNorm1d(num_features) 를 적용. num_features는 Linear Layer의 out_features와 동일
* Conv2d Layer 이후 적용 시는 BatchNorm2d(num_features) 를 적용. num_features는 Conv2d의 out_channels와 동일
* 기존 Network 모델의 Conv -> Activation을 Conv -> BN -> Activation 으로 적용

#### CIFAR 10 Dataset 및 DataLoader 생성, Trainer 클래스 생성

In [None]:
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch.utils.data import random_split

#전체 6만개 데이터 중, 5만개는 학습 데이터용. 이를 다시 학습과 검증용으로 split , 1만개는 테스트 데이터용
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=ToTensor())

tr_size = int(0.85 * len(train_dataset))
val_size = len(train_dataset) - tr_size
tr_dataset, val_dataset = random_split(train_dataset, [tr_size, val_size])
print('tr:', len(tr_dataset), 'valid:', len(val_dataset))

tr_loader = DataLoader(tr_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

In [None]:
from tqdm import tqdm
import torch.nn.functional as F

class Trainer:
    def __init__(self, model, loss_fn, optimizer, train_loader, val_loader, device=None):
        self.model = model.to(device)
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.device = device
    
    def train_epoch(self, epoch):
        self.model.train()
        
        # running 평균 loss 계산. 
        accu_loss = 0.0
        running_avg_loss = 0.0
        # 정확도, 정확도 계산을 위한 전체 건수 및 누적 정확건수
        num_total = 0.0
        accu_num_correct = 0.0
        accuracy = 0.0
        
        # tqdm으로 실시간 training loop 진행 상황 시각화
        with tqdm(total=len(self.train_loader), desc=f"Epoch {epoch+1} [Training..]", leave=True) as progress_bar:
            for batch_idx, (inputs, targets) in enumerate(self.train_loader):
                # 반드시 to(self.device). to(device) 아님. 
                inputs = inputs.to(self.device)
                targets = targets.to(self.device)
                
                # Forward pass
                outputs = self.model(inputs)
                loss = self.loss_fn(outputs, targets)
                
                # Backward pass
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # batch 반복 시 마다 누적  loss를 구하고 이를 batch 횟수로 나눠서 running 평균 loss 구함.  
                accu_loss += loss.item()
                running_avg_loss = accu_loss /(batch_idx + 1)

                # accuracy metric 계산
                # outputs 출력 예측 class값과 targets값 일치 건수 구하고
                num_correct = (outputs.argmax(-1) == targets).sum().item()
                # 배치별 누적 전체 건수와 누적 전체 num_correct 건수로 accuracy 계산
                num_total += inputs.shape[0]
                accu_num_correct += num_correct
                accuracy = accu_num_correct / num_total

                #tqdm progress_bar에 진행 상황 및 running 평균 loss와 정확도 표시
                progress_bar.update(1)
                if batch_idx % 20 == 0 or (batch_idx + 1) == progress_bar.total:  # 20 batch횟수마다 또는 맨 마지막 batch에서 update 
                    progress_bar.set_postfix({"Loss": running_avg_loss, 
                                              "Accuracy": accuracy})
        
        return running_avg_loss, accuracy
                
    def validate_epoch(self, epoch):
        if not self.val_loader:
            return None
            
        self.model.eval()

        # running 평균 loss 계산. 
        accu_loss = 0
        running_avg_loss = 0
        # 정확도, 정확도 계산을 위한 전체 건수 및 누적 정확건수
        num_total = 0.0
        accu_num_correct = 0.0
        accuracy = 0.0
        with tqdm(total=len(self.val_loader), desc=f"Epoch {epoch+1} [Validating]", leave=True) as progress_bar:
            with torch.no_grad():
                for batch_idx, (inputs, targets) in enumerate(self.val_loader):
                    inputs = inputs.to(self.device)
                    targets = targets.to(self.device)
                    
                    outputs = self.model(inputs)
                    
                    loss = self.loss_fn(outputs, targets)
                    # batch 반복 시 마다 누적  loss를 구하고 이를 batch 횟수로 나눠서 running 평균 loss 구함.  
                    accu_loss += loss.item()
                    running_avg_loss = accu_loss /(batch_idx + 1)

                    # accuracy metric 계산
                    # outputs 출력 예측 class값과 targets값 일치 건수 구하고
                    num_correct = (outputs.argmax(-1) == targets).sum().item()
                    # 배치별 누적 전체 건수와 누적 전체 num_correct 건수로 accuracy 계산  
                    num_total += inputs.shape[0]
                    accu_num_correct += num_correct
                    accuracy = accu_num_correct / num_total

                    #tqdm progress_bar에 진행 상황 및 running 평균 loss와 정확도 표시
                    progress_bar.update(1)
                    if batch_idx % 20 == 0 or (batch_idx + 1) == progress_bar.total:  # 20 batch횟수마다 또는 맨 마지막 batch에서 update 
                        progress_bar.set_postfix({"Loss": running_avg_loss, 
                                                  "Accuracy":accuracy})
        return running_avg_loss, accuracy
    
    def fit(self, epochs):
        # epoch 시마다 학습/검증 결과를 기록하는 history dict 생성.
        history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
        for epoch in range(epochs):
            train_loss, train_acc = self.train_epoch(epoch)
            val_loss, val_acc = self.validate_epoch(epoch)
            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f} Train Accuracy: {train_acc:.4f}",
                  f", Val Loss: {val_loss:.4f} Val Accuracy: {val_acc:.4f}" if val_loss is not None else "")
            # epoch 시마다 학습/검증 결과를 기록.
            history['train_loss'].append(train_loss); history['train_acc'].append(train_acc)
            history['val_loss'].append(val_loss); history['val_acc'].append(val_acc)
            
        return history 
    
    # 학습이 완료된 모델을 return 
    def get_trained_model(self):
        return self.model

#### Batch Normalization을 모델에 적용 후 성능 검증
* 기존 Network 모델의 Conv -> Activation을 Conv -> BN -> Activation 으로 적용

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchinfo import summary

NUM_INPUT_CHANNELS = 3

class SimpleCNNWithBN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        #kernel 크기 3, filter 개수 32 연속 적용.
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=NUM_INPUT_CHANNELS, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)     
        )
        
        #out_channels이 64인 2개의 Conv2d. stride=1이 기본값, padding='same'은 version 1.8에서 소개됨.  
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding='same'),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding='same'),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        # Sequential Module을 이용하여 Conv Layer들을 생성. 이 경우 relu activation위해 ReLU Layer 연결 생성 필요.
        # filter갯수 128개인 Conv Layer 2개 적용 후 Max Pooling 적용. 
        self.conv_block_3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        # GAP 및 최종 Classifier Layer
        self.classifier_block = nn.Sequential(
            nn.AdaptiveAvgPool2d(output_size=(1, 1)),
            nn.Flatten(),
            nn.Linear(in_features=128, out_features=num_classes)
        )
        
    def forward(self, x):
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        x = self.conv_block_3(x)
        x = self.classifier_block(x)

        return x


simple_cnn = SimpleCNNWithBN(num_classes=10)

summary(model=simple_cnn, input_size=(1, 3, 32, 32), 
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])


#### 반복되는 Sequential Container 부분을 함수화 적용

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchinfo import summary

NUM_INPUT_CHANNELS = 3

class SimpleCNNWithBN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        #padding 1로 conv 적용 후 출력 면적 사이즈를 입력 면적 사이즈와 동일하게 유지. 
        self.conv_block_1 = self.create_convbn_block(first_channels=3, middle_channels=32, last_channels=32)        
        
        #out_channels이 64인 2개의 Conv2d. stride=1이 기본값, padding='same'은 version 1.8에서 소개됨.  
        self.conv_block_2 = self.create_convbn_block(first_channels=32, middle_channels=64, last_channels=64)
        
        # filter갯수 128개인 Conv Layer 2개 적용 후 Max Pooling 적용. 
        self.conv_block_3 = self.create_convbn_block(first_channels=64, middle_channels=128, last_channels=128)
        
        # GAP 및 최종 Classifier Layer
        self.classifier_block = nn.Sequential(
            nn.AdaptiveAvgPool2d(output_size=(1, 1)),
            nn.Flatten(),
            nn.Linear(in_features=128, out_features=num_classes)
        )

    def create_convbn_block(self, first_channels, middle_channels, last_channels):
        conv_bn_block = nn.Sequential(
            nn.Conv2d(in_channels=first_channels, out_channels=middle_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(middle_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels=middle_channels, out_channels=last_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(last_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        return conv_bn_block
        
    def forward(self, x):
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        x = self.conv_block_3(x)
        x = self.classifier_block(x)

        return x

simple_cnn = SimpleCNNWithBN(num_classes=10)

summary(model=simple_cnn, input_size=(1, 3, 32, 32), 
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])


In [None]:
import torch 
import torch.nn as nn
from torch.optim import SGD, Adam

NUM_INPUT_CHANNELS = 3
NUM_CLASSES = 10

model = SimpleCNNWithBN(num_classes=NUM_CLASSES)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

trainer = Trainer(model=model, loss_fn=loss_fn, optimizer=optimizer,
       train_loader=tr_loader, val_loader=val_loader, device=device)
# 학습 및 평가 
history = trainer.fit(30)

### Predictor 클래스로 예측 적용

In [None]:
class Predictor:
    def __init__(self, model, device):
        self.model = model.to(device)
        self.device = device

    def evaluate(self, loader):
        # 현재 입력으로 들어온 데이터의 batch 통계(mean, variance)를 사용하지 않고, 학습 시 계산된 running 통계값을 사용
        self.model.eval()
        eval_metric = 0.0
        # 정확도 계산을 위한 전체 건수 및 누적 정확건수
        num_total = 0.0
        accu_num_correct = 0.0

        with tqdm(total=len(loader), desc=f"[Evaluating]", leave=True) as progress_bar:
            with torch.no_grad():
                for batch_idx, (inputs, targets) in enumerate(loader):
                    inputs = inputs.to(self.device)
                    targets = targets.to(self.device)
                    pred = self.model(inputs)

                    # 정확도 계산을 위해 누적 전체 건수와 누적 전체 num_correct 건수 계산  
                    num_correct = (pred.argmax(-1) == targets).sum().item()
                    num_total += inputs.shape[0]
                    accu_num_correct += num_correct
                    eval_metric = accu_num_correct / num_total

                    progress_bar.update(1)
                    if batch_idx % 20 == 0 or (batch_idx + 1) == progress_bar.total:
                        progress_bar.set_postfix({"Accuracy": eval_metric})
        
        return eval_metric

    def predict_proba(self, inputs):
        self.model.eval()
        with torch.no_grad():
            inputs = inputs.to(self.device)
            outputs = self.model(inputs)
            #예측값을 반환하므로 targets은 필요 없음.
            #targets = targets.to(self.device)
            pred_proba = F.softmax(outputs, dim=-1) #또는 dim=1

        return pred_proba

    def predict(self, inputs):
        pred_proba = self.predict_proba(inputs)
        pred_class = torch.argmax(pred_proba, dim=-1)

        return pred_class

In [None]:
trained_model = trainer.get_trained_model()

# 학습데이터와 동일하게 정규화된 데이터를 입력해야 함. 
# test_dataset = CIFAR10(root='./data', train=False, download=True, transform=ToTensor())
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

predictor = Predictor(model=trained_model, device=device)
eval_metric = predictor.evaluate(test_loader)
print(f'test dataset evaluation:{eval_metric:.4f}')

### Dropout
* Pytorch는 Dropout을 위해 nn.Dropout(p) Layer 제공
* nn.Dropout(p)는 지정된 p 확률로 입력 tensor의 element값을 0로, 0으로 변경되지 않은 element들은 scale factor로 scaling 수행

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

input_tensor = torch.randn(4, 10)
print(f"input Tensor:\n{input_tensor}")

num_zeros = torch.sum(input_tensor == 0).item()
print(f"Number of zeros in input_tensor: {num_zeros}")

# p=0.5로 Dropout Layer정의
dropout = nn.Dropout(p=0.5)

# Dropout Layer 적용. 
output_tensor = dropout(input_tensor)

# Dropout 적용 후 tensor의 element value가 0인 건수 조사
num_zeros = torch.sum(output_tensor == 0).item()

# output tensor의 element 전체 건수
total_elements = output_tensor.numel()

# output tensor의 전체 element 중 0인 건수 비율 조사. 
percentage_zeros = (num_zeros / total_elements) * 100

print(f"Output Tensor:\n{output_tensor}")
print(f"Number of zeros in output tensor: {num_zeros}")
print(f"Percentage of zeros: {percentage_zeros:.2f}%")

### Classifier 부분을 Dropout과 Linear로 연결
* Pytorch는 기존 모델(Module)의 서브 모듈만 따로 동적으로 연결하여 모델을 변경할 수 있게 함.
* 기존 모델에서 classification block 부분만 Dropout을 적용 할 수 있도록 모델 구조 변경

In [None]:
NUM_CLASSES = 10

simple_cnnbn_base = SimpleCNNWithBN(num_classes=NUM_CLASSES)
simple_cnnbn_base.classifier_block

In [None]:
do_classifier_block = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=128*4*4, out_features=300),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=300, out_features=10),
        )
simple_cnnbn_base.classifier_block = do_classifier_block
print(simple_cnnbn_base.classifier_block)

In [None]:
summary(model=simple_cnnbn_base, input_size=(1, 3, 32, 32), 
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])

In [None]:
def create_do_classifier_block(first_features, second_features, first_dos, second_dos, num_classes=10):
    return nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=first_dos),
            nn.Linear(in_features=first_features, out_features=second_features),
            nn.ReLU(),
            nn.Dropout(p=second_dos),
            nn.Linear(in_features=second_features, out_features=num_classes),
        )

simple_cnnbn_base = SimpleCNNWithBN(num_classes=NUM_CLASSES)
do_classifier_block = create_do_classifier_block(first_features=128*4*4, second_features=300,
                                                 first_dos=0.5, second_dos=0.3, num_classes=10)
simple_cnnbn_base.classifier_block = do_classifier_block

summary(model=simple_cnnbn_base, input_size=(1, 3, 32, 32), 
        col_names=['input_size', 'output_size', 'num_params'], 
        row_settings=['var_names'])

In [None]:
import torch 
import torch.nn as nn
from torch.optim import SGD, Adam

NUM_INPUT_CHANNELS = 3
NUM_CLASSES = 10

model = SimpleCNNWithBN(num_classes=NUM_CLASSES)
do_classifier_block = create_do_classifier_block(first_features=128*4*4, second_features=300,
                                                 first_dos=0.5, second_dos=0.3, num_classes=10)
model.classifier_block = do_classifier_block
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
optimizer = Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

trainer = Trainer(model=model, loss_fn=loss_fn, optimizer=optimizer,
       train_loader=tr_loader, val_loader=val_loader, device=device)
# 학습 및 평가 
history = trainer.fit(30)

In [None]:
trained_model = trainer.get_trained_model()

# 학습데이터와 동일하게 정규화된 데이터를 입력해야 함. 
# test_dataset = CIFAR10(root='./data', train=False, download=True, transform=ToTensor())
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

predictor = Predictor(model=trained_model, device=device)
eval_metric = predictor.evaluate(test_loader)
print(f'test dataset evaluation:{eval_metric:.4f}')