# CNN

In [1]:
# 0. 작업 준비
import numpy as numpy
import matplotlib.pyplot as plt

import torch
from torch.utils import data
from torchvision import datasets, transforms, utils
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')

In [3]:
transform = transforms.Compose([transforms.ToTensor()])

In [8]:
tr_ds = datasets.FashionMNIST(root='../data/',
                              train=True,
                              download=False,
                              transform=transform)

In [9]:
BATCH_SIZE = 60000
tr_ds_loader = torch.utils.data.DataLoader(
    dataset = tr_ds,
    batch_size = BATCH_SIZE,
    shuffle = False
)
img, _ = next(iter(tr_ds_loader))
img.shape

torch.Size([60000, 1, 28, 28])

In [10]:
img.mean(), img.std()

(tensor(0.2860), tensor(0.3530))

In [22]:
BATCH_SIZE = 64
EPOCHS = 10

In [23]:
transform = transforms.Compose([
    transforms.ToTensor(), # Tensor화
    transforms.Normalize(0.2860, 0.3530) # torch는 Normalize 전에 Tensor화 돼야 함!
])

tr_ds_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST(
        root='../data/',
        train=True,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

tt_ds_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST(
        root='../data/',
        train=False,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=False
)

$$O=\frac{I-F+2P}{S}+1$$

```py
28 * 28 * 1 # 입력 사이즈
-> 24 * 24 * 10
-> 12 * 12 * 10
    -> 8 * 8 * 20
    -> 4 * 4 * 20 = 320

    # 또는 padding=same 일 때
    -> 12 * 12 * 20
    -> 6 * 6 * 20 = 720

# --------------------------

# 또는 padding=same 일 때
28 * 28 * 1
-> 28 * 28 * 10  
-> 14 * 14 * 10  
    -> 10 * 10 * 20  
    -> 5 * 5 * 20 = 500

    # 또는 padding=same 일 때
    -> 14 * 14 * 20  
    -> 7 * 7 * 20 = 980
```

In [24]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), 2)
        x = F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

In [25]:
model = Model().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 학습에서 스케줄 계획하는 건 학습률 외엔 거의 없음.
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # 가급적 에폭마다 스케줄

In [38]:
def train(model, tr_ds_loader, optimizer):
    model.train()
    running_loss = 0.
    total = 0
    correct = 0
    for (x, y) in tr_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        total += len(data)
        running_loss += loss.item() * len(data)
        train_loss = running_loss / total
        correct += (output.argmax(dim=1) == target).sum().item()
        train_accuracy = correct / total

        tr_ds_loader.set_postfix({
            'train_loss': f'{train_loss:.4f}',
            'train_accuracy': f'{train_accuracy * 100:.2f}%'
        })

In [41]:
@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.
    correct = 0
    for data, target in tt_ds_loader:
        data, target = data.to(DEVICE), target.to(DEVICE)
        output = model(data)
        test_loss += F.cross_entropy(output, target).item() * len(data)
        correct += (output.argmax(dim=1) == target).sum().item()
    test_loss /= len(tt_ds_loader.dataset)
    test_accuracy = correct / len(tt_ds_loader.dataset)
    return test_loss, test_accuracy

In [44]:
from tqdm import tqdm
EPOCHS = 3
for epoch in range(1, EPOCHS+1):
    train_bar = tqdm(
        tr_ds_loader,
        desc=f'[Epochs: {epoch}]',
        leave=True
        )
    train(model, train_bar, optimizer)
    scheduler.step()
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)

    tqdm.write(
        f'test_loss: {test_loss:.4f}, '
        f'test_accuracy: {test_accuracy * 100:.2f}%, '
        f'lr_scheduler{scheduler.get_last_lr()}'
        '\n'
    )

[Epochs: 1]: 100%|██████████| 938/938 [00:16<00:00, 56.57it/s, train_loss=0.4908, train_accuracy=82.39%]


test_loss: 0.3798, test_accuracy: 86.09%, lr_scheduler[0.001]



[Epochs: 2]: 100%|██████████| 938/938 [00:17<00:00, 52.30it/s, train_loss=0.4887, train_accuracy=82.65%]


test_loss: 0.3796, test_accuracy: 86.20%, lr_scheduler[0.001]



[Epochs: 3]: 100%|██████████| 938/938 [00:16<00:00, 57.31it/s, train_loss=0.4857, train_accuracy=82.75%]


test_loss: 0.3768, test_accuracy: 86.23%, lr_scheduler[0.0001]



# CIFAR 10

In [2]:
ck_tr = transforms.Compose([transforms.ToTensor()])

tr_ds = datasets.CIFAR10(root='../data/',
                 train=True,
                 download=True,
                 transform=ck_tr)

tr_ds_loader = torch.utils.data.DataLoader(tr_ds,
                                           batch_size=50000,
                                           shuffle=False)

In [3]:
ck_data = iter(tr_ds_loader)
data, _ = next(ck_data)
data.shape

torch.Size([50000, 3, 32, 32])

In [4]:
print(data[:, 0].mean(), data[:, 0].std())
print(data[:, 1].mean(), data[:, 1].std())
print(data[:, 2].mean(), data[:, 2].std())

tensor(0.4914) tensor(0.2470)
tensor(0.4822) tensor(0.2435)
tensor(0.4465) tensor(0.2616)


In [5]:
# 위의 코드를 한 번에!
data.mean(dim=[0, 2, 3]), data.std(dim=[0, 2, 3]) # 0,2,3번 차원 전체를 뭉텅이로 축소하고 1번 차원(channel)만 남겨 계산

(tensor([0.4914, 0.4822, 0.4465]), tensor([0.2470, 0.2435, 0.2616]))

In [6]:
# 상수 설정
BATCH_SIZE = 64
EPOCHS = 10

In [None]:
# 데이터 수정 (노이즈 삽입)
# 1. 데이터 준비
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

tr_ds_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        root='../data/',
        train=True,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

tt_ds_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        root='../data/',
        train=False,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [21]:
x, y = next(iter(tr_ds_loader))
len(x)

64

In [None]:
class CNN_Model(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, in_channels*2, 3) # 6, 30, 30
        self.bn1 = nn.BatchNorm2d(in_channels*2)
        self.relu = nn.ReLU()
        self.mp = nn.MaxPool2d(2, 2) # 6, 15, 15
        self.conv2 = nn.Conv2d(in_channels*2, in_channels*4, 3) # 12, 13, 13
        self.bn2 = nn.BatchNorm2d(in_channels*4)
        self.conv3 = nn.Conv2d(in_channels*4, in_channels*8, 3)
        self.bn3 = nn.BatchNorm2d(in_channels*8)
        self.fc = nn.Linear(in_channels*8*2*2, 10)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x))) # 6, 30, 30
        out = self.mp(out) # 6, 15, 15
        out = self.relu(self.bn2(self.conv2(out))) # 12, 13, 13
        out = self.mp(out) # 12, 6, 6
        out = self.relu(self.bn3(self.conv3(out))) # 24, 4, 4
        out = self.mp(out) # 24, 2, 2
        out = out.view(-1, 24*2*2)
        out = self.fc(out)
        return out
    
model = CNN_Model(3).to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=3, gamma=0.1)

In [37]:
def train(model, tr_ds_loader, optimizer):
    model.train()
    running_loss = 0.
    total = 0
    correct = 0
    for (x, y) in tr_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        total += len(data)
        running_loss += loss.item() * len(data)
        train_loss = running_loss / total
        correct += (output.argmax(dim=1) == target).sum().item()
        train_accuracy = correct / total

        tr_ds_loader.set_postfix({
            'train_loss': f'{train_loss:.4f}',
            'train_accuracy': f'{train_accuracy * 100:.2f}%'
        })

In [38]:
@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.
    correct = 0
    for data, target in tt_ds_loader:
        data, target = data.to(DEVICE), target.to(DEVICE)
        output = model(data)
        test_loss += F.cross_entropy(output, target).item() * len(data)
        correct += (output.argmax(dim=1) == target).sum().item()
    test_loss /= len(tt_ds_loader.dataset)
    test_accuracy = correct / len(tt_ds_loader.dataset)
    return test_loss, test_accuracy

In [39]:
from tqdm import tqdm
EPOCHS = 3
for epoch in range(1, EPOCHS+1):
    train_bar = tqdm(
        tr_ds_loader,
        desc=f'[Epochs: {epoch}]',
        leave=True
        )
    train(model, train_bar, optimizer)
    scheduler.step()
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)

    tqdm.write(
        f'test_loss: {test_loss:.4f}, '
        f'test_accuracy: {test_accuracy * 100:.2f}%, '
        f'lr_scheduler: {scheduler.get_last_lr()[0]:.4f}' # scheduler.get_last_lr()가 [0.1] 이런 식으로 리스트로 return됨
        '\n'
    )

[Epochs: 1]: 100%|██████████| 782/782 [00:13<00:00, 56.76it/s, train_loss=1.5621, train_accuracy=43.34%]


test_loss: 1.4605, test_accuracy: 48.42%, lr_scheduler: 0.1000



[Epochs: 2]: 100%|██████████| 782/782 [00:14<00:00, 55.07it/s, train_loss=1.3375, train_accuracy=52.50%]


test_loss: 1.3064, test_accuracy: 54.34%, lr_scheduler: 0.1000



[Epochs: 3]: 100%|██████████| 782/782 [00:15<00:00, 51.56it/s, train_loss=1.2734, train_accuracy=54.91%]


test_loss: 1.3322, test_accuracy: 52.75%, lr_scheduler: 0.0100



---
ResNet 코드 연습

In [None]:
import torch.nn as nn
shortcut = nn.Sequential()
shortcut((nn.Linear(5, 3), nn.Linear(3, 10)))


(Linear(in_features=5, out_features=3, bias=True),
 Linear(in_features=3, out_features=10, bias=True))

In [None]:
shortcut(nn.Linear(5, 10)) + nn.Sequential(nn.Linear(5, 10)) # layer 쟈체끼리의 덧셈은 불가능

TypeError: unsupported operand type(s) for +: 'Linear' and 'Sequential'

In [None]:
# 초기화 시 (레이어 정의)
shortcut = nn.Sequential()  # 또는 Conv2d 등

x = torch.randn(1, 64, 32, 32)

# forward 시 (데이터 처리)
out = nn.Conv2d(64, 64, 3, padding=1)(x) + shortcut(x) # layer의 출력끼리 덧셈은 가능
#     ──────────────────────────────────   ───────────
#                  텐서 결과                 텐서 결과
out.shape

torch.Size([1, 64, 32, 32])

In [9]:
class BasicBlock(nn.Module):
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False), 
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out
    
BasicBlock(3, 3, 2)

BasicBlock(
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (shortcut): Sequential(
    (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [41]:
stride = 1
num_blocks = 2
strides = [stride] + [1] * (num_blocks-1)
strides

[1, 1]

In [42]:
stride = 1
num_blocks = 3
strides = [stride] + [1] * (num_blocks-1)
strides

[1, 1, 1]

In [43]:
stride = 2
num_blocks = 2
strides = [stride] + [1] * (num_blocks-1)
strides

[2, 1]

---

# ResNet

In [None]:
# 2. 모델 설계
class BasicBlock(nn.Module):
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) # same padding
        self.bn2 = nn.BatchNorm2d(planes)

        # out += x 시  이미지 크기나 채널 수가 다를 경우 shortcut으로 shape 맞춘 후 연산.
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes: # stride가 1이 아니거나 입력과 출력 채널수가 다를 때 
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False), 
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out
    
class ResNet(nn.Module): # 층이 14개이므로 ResNet-14
    def __init__(self, class_n):
        super().__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16) # 16, 32, 32
        self.l1 = self._make_l(16, 2, 1) # 16, 32, 32
        self.l2 = self._make_l(32, 2, 1) # 32, 32, 32
        self.l3 = self._make_l(64, 2, 1) # 64, 32, 32
        self.out_l = nn.Linear(1024, class_n) # avgpool 때문에 64*(32/8)*(32/8) = 1024

    def _make_l(self, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks-1)
        l = []
        for stride in strides:
            l.append(BasicBlock(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*l)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)

        # 분류기 층
        x = F.avg_pool2d(x, 8)
        x = x.view(x.size(0), -1)
        out = self.out_l(x)
        return out

In [64]:
model = ResNet(10).to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=3, gamma=0.1)

In [65]:
model

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (l1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True

In [66]:
def train(model, tr_ds_loader, optimizer):
    model.train()
    running_loss = 0.
    total = 0
    correct = 0
    for (x, y) in tr_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        
        total += len(data)
        running_loss += loss.item() * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()

        train_loss = running_loss / total
        train_accuracy = correct / total

        tr_ds_loader.set_postfix({
            'train_loss': f'{train_loss:.4f}',
            'train_accuracy': f'{train_accuracy*100:.2f}%'
        })

In [67]:
@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.
    correct = 0
    for x, y in tt_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        output = model(data)
        loss = F.cross_entropy(output, target).item()
        test_loss += loss * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()
    test_loss /= len(tt_ds_loader.dataset)
    test_accuracy = correct / len(tt_ds_loader.dataset)
    return test_loss, test_accuracy

In [None]:
from tqdm import tqdm
EPOCHS = 20
for epoch in range(1, EPOCHS+1):
    train_bar = tqdm(
        tr_ds_loader,
        desc=f'[Epochs: {epoch}/{EPOCHS}]',
        leave=True
    )
    train(model, train_bar, optimizer)
    scheduler.step()
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)
    tqdm.write(
        f'test_loss: {test_loss:.4f}, '
        f'test_accuracy: {test_accuracy * 100:.2f}%, '
        f'lr: {scheduler.get_last_lr()}'
    )

[Epochs: 1/20]: 100%|██████████| 782/782 [00:44<00:00, 17.51it/s, train_loss=2.1054, train_accuracy=24.59%]


test_loss: 1.8224, test_accuracy: 35.56%, lr: [0.1]


[Epochs: 2/20]: 100%|██████████| 782/782 [00:42<00:00, 18.47it/s, train_loss=1.6438, train_accuracy=40.32%]


test_loss: 1.6426, test_accuracy: 41.25%, lr: [0.1]


[Epochs: 3/20]: 100%|██████████| 782/782 [00:42<00:00, 18.49it/s, train_loss=1.4402, train_accuracy=48.71%]


test_loss: 1.4143, test_accuracy: 47.68%, lr: [0.010000000000000002]


[Epochs: 4/20]: 100%|██████████| 782/782 [00:42<00:00, 18.58it/s, train_loss=1.0878, train_accuracy=61.84%]


test_loss: 1.0760, test_accuracy: 61.89%, lr: [0.010000000000000002]


[Epochs: 5/20]: 100%|██████████| 782/782 [00:42<00:00, 18.41it/s, train_loss=1.0094, train_accuracy=64.31%]


test_loss: 0.9864, test_accuracy: 65.11%, lr: [0.010000000000000002]


[Epochs: 6/20]: 100%|██████████| 782/782 [00:42<00:00, 18.39it/s, train_loss=0.9514, train_accuracy=66.54%]


test_loss: 0.9516, test_accuracy: 66.73%, lr: [0.0010000000000000002]


[Epochs: 7/20]: 100%|██████████| 782/782 [00:42<00:00, 18.37it/s, train_loss=0.8668, train_accuracy=69.73%]


test_loss: 0.8774, test_accuracy: 69.00%, lr: [0.0010000000000000002]


[Epochs: 8/20]: 100%|██████████| 782/782 [00:42<00:00, 18.33it/s, train_loss=0.8508, train_accuracy=70.39%]


test_loss: 0.8675, test_accuracy: 69.33%, lr: [0.0010000000000000002]


[Epochs: 9/20]: 100%|██████████| 782/782 [00:42<00:00, 18.58it/s, train_loss=0.8394, train_accuracy=70.80%]


test_loss: 0.8608, test_accuracy: 69.80%, lr: [0.00010000000000000003]


[Epochs: 10/20]: 100%|██████████| 782/782 [00:42<00:00, 18.59it/s, train_loss=0.8258, train_accuracy=71.24%]


test_loss: 0.8525, test_accuracy: 69.89%, lr: [0.00010000000000000003]


[Epochs: 11/20]: 100%|██████████| 782/782 [00:42<00:00, 18.61it/s, train_loss=0.8221, train_accuracy=71.36%]


test_loss: 0.8513, test_accuracy: 69.90%, lr: [0.00010000000000000003]


[Epochs: 12/20]: 100%|██████████| 782/782 [00:42<00:00, 18.34it/s, train_loss=0.8208, train_accuracy=71.34%]


test_loss: 0.8509, test_accuracy: 70.02%, lr: [1.0000000000000004e-05]


[Epochs: 13/20]: 100%|██████████| 782/782 [00:42<00:00, 18.47it/s, train_loss=0.8209, train_accuracy=71.19%]


test_loss: 0.8507, test_accuracy: 69.95%, lr: [1.0000000000000004e-05]


[Epochs: 14/20]: 100%|██████████| 782/782 [00:42<00:00, 18.34it/s, train_loss=0.8200, train_accuracy=71.49%]


test_loss: 0.8513, test_accuracy: 69.94%, lr: [1.0000000000000004e-05]


[Epochs: 15/20]: 100%|██████████| 782/782 [00:42<00:00, 18.26it/s, train_loss=0.8191, train_accuracy=71.35%]


test_loss: 0.8509, test_accuracy: 69.99%, lr: [1.0000000000000004e-06]


[Epochs: 16/20]:  95%|█████████▌| 744/782 [00:40<00:02, 18.72it/s, train_loss=0.8198, train_accuracy=71.48%]

---
코랩에서 공식 ResNet이랑 유사하게 구조 변경해서 재학습

shortcut을 pointwise convolution으로 변경 및 _make_layer로 생성되는 l2, l3 layer의 stride=2로 변경

In [None]:
# 0. 작업 준비
import numpy as numpy
import matplotlib.pyplot as plt

import torch
from torch.utils import data
from torchvision import datasets, transforms, utils
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm


USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')
print(f'Using device: {DEVICE}')

Using device: cuda


In [None]:
ck_tr = transforms.Compose([transforms.ToTensor()])

tr_ds = datasets.CIFAR10(root='./data/',
                 train=True,
                 download=True,
                 transform=ck_tr)

tr_ds_loader = torch.utils.data.DataLoader(tr_ds,
                                           batch_size=50000,
                                           shuffle=False)

100%|██████████| 170M/170M [00:13<00:00, 12.8MB/s]


In [None]:
ck_data = iter(tr_ds_loader)
data, _ = next(ck_data)
data.shape

torch.Size([50000, 3, 32, 32])

In [None]:
print(data[:, 0].mean(), data[:, 0].std())
print(data[:, 1].mean(), data[:, 1].std())
print(data[:, 2].mean(), data[:, 2].std())

tensor(0.4914) tensor(0.2470)
tensor(0.4822) tensor(0.2435)
tensor(0.4465) tensor(0.2616)


In [None]:
# 위의 코드를 한 번에!
data.mean(dim=[0, 2, 3]), data.std(dim=[0, 2, 3]) # 0,2,3번 차원 전체를 뭉텅이로 축소하고 1번 차원(channel)만 남겨 계산

(tensor([0.4914, 0.4822, 0.4465]), tensor([0.2470, 0.2435, 0.2616]))

In [None]:
# 상수 설정
if USE_CUDA:
    BATCH_SIZE = 128
else:
    BATCH_SIZE = 64
print(f'BATCH_SIZE: {BATCH_SIZE}')

BATCH_SIZE: 128


In [None]:
# 데이터 수정 (노이즈 삽입)
# 1. 데이터 준비
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # 데이터 증강
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

tr_ds_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        root='./data/',
        train=True,
        download=False,
        transform=train_transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

tt_ds_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        root='./data/',
        train=False,
        download=False,
        transform=test_transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=False
)

# ResNet-14
공식적으로 ResNet에서 14layer 모델은 없음!

In [None]:
# 2. 모델 설계
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes * self.expansion, kernel_size=1, stride=stride, padding=0, bias=False), # Shortcut은 spatial pattern 학습보다는 feature map의 차원 정렬과 downsampling을 위한 projection 역할(잔차 연결을 위한 지름길 역할)을 하므로 pointwise convolution을 사용
                nn.BatchNorm2d(planes * self.expansion)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, class_n):
        super().__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) # 16, 32, 32
        self.bn1 = nn.BatchNorm2d(16)
        self.l1 = self._make_layer(16, 2, 1) # 16, 32, 32
        self.l2 = self._make_layer(32, 2, 2) # 32, 16, 16
        self.l3 = self._make_layer(64, 2, 2) # 64, 8, 8
        self.out_l = nn.Linear(64 * BasicBlock.expansion, class_n) # 64*1*1

    def _make_layer(self, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        l = []
        for stride in strides:
            l.append(BasicBlock(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*l)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)

        # 분류기 층
        # x = F.avg_pool2d(x, 8)
        x = F.adaptive_avg_pool2d(x, 1) # 입력 크기와 무관하게 (1, 1)로 안전하게 pooling
        x = x.view(x.size(0), -1)
        out = self.out_l(x)
        return out

In [None]:
model = ResNet(10).to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=3, gamma=0.1)

In [None]:
def train(model, tr_ds_loader, optimizer):
    model.train()
    running_loss = 0.
    total = 0
    correct = 0
    for (x, y) in tr_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        total += len(data)
        running_loss += loss.item() * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()

        train_loss = running_loss / total
        train_accuracy = correct / total

        tr_ds_loader.set_postfix({
            'train_loss': f'{train_loss:.4f}',
            'train_accuracy': f'{train_accuracy*100:.2f}%'
        })

In [None]:
@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.
    correct = 0
    for x, y in tt_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        output = model(data)
        loss = F.cross_entropy(output, target).item()
        test_loss += loss * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()
    test_loss /= len(tt_ds_loader.dataset)
    test_accuracy = correct / len(tt_ds_loader.dataset)
    return test_loss, test_accuracy

In [None]:
from tqdm import tqdm
EPOCHS = 160
ES_PATIENCE = 5
best_loss = float('inf')
es_patience = 0
for epoch in range(1, EPOCHS+1):

    train_bar = tqdm(
        tr_ds_loader,
        desc=f'[Epochs: {epoch}/{EPOCHS}]',
        leave=True
    )
    train(model, train_bar, optimizer)
    scheduler.step()
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)
    tqdm.write(
        f'test_loss: {test_loss:.4f}, '
        f'test_accuracy: {test_accuracy * 100:.2f}%, '
        f'lr: {scheduler.get_last_lr()[0]:.4f}, '
        f'ES_Patience: {es_patience}/{ES_PATIENCE}'
    )

    if best_loss > test_loss:
        best_loss = test_loss
        es_patience = 0
    else:
        es_patience += 1
    if es_patience >= ES_PATIENCE:
        print('Early Stopping이 동작하였습니다.')
        break

[Epochs: 1/160]: 100%|██████████| 391/391 [00:32<00:00, 12.04it/s, train_loss=1.6143, train_accuracy=39.23%]


test_loss: 1.4182, test_accuracy: 49.08%, lr: 0.1000, ES_Patience: 0/5


[Epochs: 2/160]: 100%|██████████| 391/391 [00:29<00:00, 13.10it/s, train_loss=1.1545, train_accuracy=58.08%]


test_loss: 1.0657, test_accuracy: 61.14%, lr: 0.1000, ES_Patience: 0/5


[Epochs: 3/160]: 100%|██████████| 391/391 [00:30<00:00, 13.02it/s, train_loss=0.9363, train_accuracy=66.79%]


test_loss: 0.9641, test_accuracy: 66.66%, lr: 0.0100, ES_Patience: 0/5


[Epochs: 4/160]: 100%|██████████| 391/391 [00:30<00:00, 12.95it/s, train_loss=0.6927, train_accuracy=75.85%]


test_loss: 0.6639, test_accuracy: 76.82%, lr: 0.0100, ES_Patience: 0/5


[Epochs: 5/160]: 100%|██████████| 391/391 [00:29<00:00, 13.11it/s, train_loss=0.6387, train_accuracy=77.82%]


test_loss: 0.6422, test_accuracy: 77.77%, lr: 0.0100, ES_Patience: 0/5


[Epochs: 6/160]: 100%|██████████| 391/391 [00:29<00:00, 13.04it/s, train_loss=0.6103, train_accuracy=78.64%]


test_loss: 0.6022, test_accuracy: 79.25%, lr: 0.0010, ES_Patience: 0/5


[Epochs: 7/160]: 100%|██████████| 391/391 [00:30<00:00, 12.88it/s, train_loss=0.5671, train_accuracy=80.22%]


test_loss: 0.5787, test_accuracy: 80.17%, lr: 0.0010, ES_Patience: 0/5


[Epochs: 8/160]: 100%|██████████| 391/391 [00:29<00:00, 13.04it/s, train_loss=0.5604, train_accuracy=80.63%]


test_loss: 0.5770, test_accuracy: 80.12%, lr: 0.0010, ES_Patience: 0/5


[Epochs: 9/160]: 100%|██████████| 391/391 [00:29<00:00, 13.09it/s, train_loss=0.5531, train_accuracy=80.82%]


test_loss: 0.5716, test_accuracy: 80.11%, lr: 0.0001, ES_Patience: 0/5


[Epochs: 10/160]: 100%|██████████| 391/391 [00:30<00:00, 12.94it/s, train_loss=0.5475, train_accuracy=81.02%]


test_loss: 0.5712, test_accuracy: 80.27%, lr: 0.0001, ES_Patience: 0/5


[Epochs: 11/160]: 100%|██████████| 391/391 [00:30<00:00, 12.86it/s, train_loss=0.5441, train_accuracy=81.14%]


test_loss: 0.5682, test_accuracy: 80.46%, lr: 0.0001, ES_Patience: 0/5


[Epochs: 12/160]: 100%|██████████| 391/391 [00:30<00:00, 12.90it/s, train_loss=0.5484, train_accuracy=80.97%]


test_loss: 0.5693, test_accuracy: 80.40%, lr: 0.0000, ES_Patience: 0/5


[Epochs: 13/160]: 100%|██████████| 391/391 [00:30<00:00, 12.99it/s, train_loss=0.5467, train_accuracy=81.14%]


test_loss: 0.5698, test_accuracy: 80.35%, lr: 0.0000, ES_Patience: 1/5


[Epochs: 14/160]: 100%|██████████| 391/391 [00:30<00:00, 12.67it/s, train_loss=0.5478, train_accuracy=80.96%]


test_loss: 0.5696, test_accuracy: 80.39%, lr: 0.0000, ES_Patience: 2/5


[Epochs: 15/160]: 100%|██████████| 391/391 [00:30<00:00, 12.77it/s, train_loss=0.5487, train_accuracy=80.91%]


test_loss: 0.5704, test_accuracy: 80.36%, lr: 0.0000, ES_Patience: 3/5


[Epochs: 16/160]: 100%|██████████| 391/391 [00:30<00:00, 12.88it/s, train_loss=0.5449, train_accuracy=81.01%]


test_loss: 0.5693, test_accuracy: 80.43%, lr: 0.0000, ES_Patience: 4/5
Early Stopping이 동작하였습니다.


In [74]:
import torch, os, psutil

print("시스템 전체 논리 코어 수:", os.cpu_count())
print("시스템 전체 물리 코어 수:", psutil.cpu_count(logical=False))
print("PyTorch로 설정한 스레드 수:", torch.get_num_threads())

시스템 전체 논리 코어 수: 8
시스템 전체 물리 코어 수: 4
PyTorch로 설정한 스레드 수: 100


In [75]:
torch.set_num_threads(12)
print("PyTorch로 설정한 스레드 수:", torch.get_num_threads()) # 실제 8개가 최대라서 12개로 하면 오버헤드 걸림.

PyTorch로 설정한 스레드 수: 12


In [76]:
torch.set_num_threads(100)
print("PyTorch로 설정한 스레드 수:", torch.get_num_threads()) # 실제 8개가 최대라서 100개로 하면 오버헤드 걸림.

PyTorch로 설정한 스레드 수: 100
