# 논문 속 데이터 전처리 요약

- **CIFAR-10 (C10) 및 CIFAR-100 (C100)**
  - *이미지 크기*: 32×32 컬러 자연 이미지
  - *클래스 수*: CIFAR-10은 10가지, CIFAR-100은 100가지 클래스
  - *데이터셋 크기*:
    - CIFAR-10
      - 훈련 세트: 50,000 이미지
      - 테스트 세트: 10,000 이미지
    - CIFAR-100
      - 훈련 세트: 50,000 이미지
      - 테스트 세트: 10,000 이미지
  - *검증 세트*: 훈련 세트에서 5,000 이미지를 홀드 아웃
  - *데이터 증강*: 미러링/쉬프팅 (C10+, C100+ 표시에 사용)
  - *전처리*: 채널 평균 및 표준 편차로 데이터 정규화
  - *최종 런*: 모든 50,000 훈련 이미지를 사용하고 최종 테스트 오류를 보고


# 논문 속 모델 특징

- **전통적인 Convolutional Networks:**
  - 레이어 $\ell$의 $H_{\ell}(x_{\ell-1})$ 결과는 다음 $\ell+1$ 레이어의 입력.
  - $ x_{\ell} = H_{\ell}(x_{\ell-1}) $.
- **ResNet**:
  - 전통적인 네트워크 레이어 연결에 '스킵 연결' 추가.
  - 공식: $ x_{\ell} = H_{\ell}(x_{\ell-1}) + x_{\ell-1} $
  - 장점: 그래디언트가 신호를 쉽게 통과하여 역전파 과정에서 정보 손실을 줄임.
- **DenseNet**:
  - 각 레이어는 모든 이전 레이어의 feature-map을 입력으로 사용.
  - 공식: $ x_{\ell} = H_{\ell}([x_0, x_1, \ldots, x_{\ell-1}]) $
  - 이 연결 방식 덕분에 각 레이어는 네트워크의 '집합적 지식'에 접근 가능.
  - 장점:
    - 그래디언트 소실 문제 해소
    - feature 전파 강화
    - feature 재사용 권장
    - 파라미터 수 감소
- **DenseNet의 추가 기능**:
  - **Bottleneck Layers**: 1x1 convolution으로 입력 feature-map 수 줄임.
  - **Compression**: 네트워크 압축을 통해 모델 효율성 향상.


# Data Preparing

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader

In [2]:
transform = transforms.Compose(
    [
        # transforms.Resize(224),
        # transforms.RandomCrop((224, 224), padding=4),
        transforms.RandomCrop((32, 32), padding=4),
        transforms.RandomVerticalFlip(0.5),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    ]
)

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


# Modeling

## DenseLayer

In [3]:
import torch.utils.checkpoint as cp


class DenseLayer(torch.nn.Module):
    def __init__(self, in_channels, growth_rate, bn_size):
        super(DenseLayer, self).__init__()
        self.conv_block1 = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, bn_size * growth_rate,
                      kernel_size=1, stride=1, bias=False),
        )
        self.conv_block2 = nn.Sequential(
            nn.BatchNorm2d(bn_size * growth_rate),
            nn.ReLU(inplace=True),
            nn.Conv2d(bn_size * growth_rate, growth_rate,
                      kernel_size=3, stride=1, padding=1, bias=False),
        )

    def forward(self, x):
        out = self.conv_block1(x)
        out = self.conv_block2(out)
        return torch.cat((x, out), 1)

## DenseBlock

In [4]:
class DenseBlock(torch.nn.Module):
    def __init__(self, in_channels, num_layers, growth_rate, bn_size):
        super(DenseBlock, self).__init__()
        self.layers = nn.ModuleList([
            DenseLayer(in_channels + i * growth_rate, growth_rate, bn_size)
            for i in range(num_layers)
        ])

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

## TransitionLayer

In [5]:
class TransitionLayer(torch.nn.Module):
    def __init__(self, in_channels):
        super(TransitionLayer, self).__init__()
        self.bn = nn.BatchNorm2d(in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv = nn.Conv2d(in_channels, in_channels // 2,
                              kernel_size=1, stride=1, bias=False)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.bn(x)
        x = self.relu(x)
        x = self.conv(x)
        x = self.pool(x)
        return x

## DenseNet

In [6]:
import torch.nn.functional as F


class DenseNet(nn.Module):
    def __init__(self, growth_rate, num_layers, num_classes, bn_size):
        super(DenseNet, self).__init__()
        self.bn = nn.BatchNorm2d(3)
        self.relu = nn.ReLU(inplace=True)
        self.conv = nn.Conv2d(3, 112,
                              kernel_size=7, stride=2, padding=3, bias=False)
        self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        dense_blocks = []
        features = 112
        for num_layer in num_layers:
            dense_blocks.append(DenseBlock(features, num_layer, growth_rate, bn_size))
            if num_layer != num_layers[-1]:  
                dense_blocks.append(TransitionLayer(features + num_layer * growth_rate))
                features = (features + num_layer * growth_rate) // 2

        self.dense_blocks = nn.Sequential(*dense_blocks)

        self.avg_pool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(1030, num_classes)
        print(features)

    def forward(self, x):
        x = self.bn(x)
        x = self.relu(x)
        x = self.conv(x)
        x = self.max_pool(x)
        x = self.dense_blocks(x)
        x = F.relu(x, inplace=True)
        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x = torch.softmax(x, dim=1)
        return x


# Training

In [7]:
import tqdm
import torch.optim as optim
from torch.cuda.amp import GradScaler, autocast

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DenseNet(
    growth_rate=32,
    num_layers=[6, 12, 24, 16],
    num_classes=10,
    bn_size=4
)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 10
for epoch in range(num_epochs):
    iterator = tqdm.tqdm(train_loader)
    model.train()
    for images, labels in iterator:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        iterator.set_description(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    torch.cuda.empty_cache()



518


Epoch [1/10], Loss: 2.1396: 100%|██████████| 782/782 [00:41<00:00, 18.96it/s]
Epoch [2/10], Loss: 2.2114: 100%|██████████| 782/782 [00:41<00:00, 18.91it/s]
Epoch [3/10], Loss: 2.1727: 100%|██████████| 782/782 [00:40<00:00, 19.10it/s]
Epoch [4/10], Loss: 2.0111: 100%|██████████| 782/782 [00:41<00:00, 19.07it/s]
Epoch [5/10], Loss: 2.1443: 100%|██████████| 782/782 [00:40<00:00, 19.13it/s]
Epoch [6/10], Loss: 2.0263: 100%|██████████| 782/782 [00:40<00:00, 19.15it/s]
Epoch [7/10], Loss: 2.3249: 100%|██████████| 782/782 [00:41<00:00, 19.04it/s]
Epoch [8/10], Loss: 1.9489: 100%|██████████| 782/782 [00:41<00:00, 18.96it/s]
Epoch [9/10], Loss: 1.8978: 100%|██████████| 782/782 [00:41<00:00, 19.03it/s]
Epoch [10/10], Loss: 2.0878: 100%|██████████| 782/782 [00:40<00:00, 19.13it/s]


# Testing

In [8]:
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    iterator = tqdm.tqdm(test_loader)
    for images, labels in iterator:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'\nAccuracy of the model on the test images: {100 * correct / total:.2f}%')


100%|██████████| 157/157 [00:10<00:00, 14.31it/s]


Accuracy of the model on the test images: 47.29%



