# CNN

In [1]:
# 0. 작업 준비
import numpy as numpy
import matplotlib.pyplot as plt

import torch
from torch.utils import data
from torchvision import datasets, transforms, utils
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')

In [3]:
transform = transforms.Compose([transforms.ToTensor()])

In [8]:
tr_ds = datasets.FashionMNIST(root='../data/',
                              train=True,
                              download=False,
                              transform=transform)

In [9]:
BATCH_SIZE = 60000
tr_ds_loader = torch.utils.data.DataLoader(
    dataset = tr_ds,
    batch_size = BATCH_SIZE,
    shuffle = False
)
img, _ = next(iter(tr_ds_loader))
img.shape

torch.Size([60000, 1, 28, 28])

In [10]:
img.mean(), img.std()

(tensor(0.2860), tensor(0.3530))

In [22]:
BATCH_SIZE = 64
EPOCHS = 10

In [23]:
transform = transforms.Compose([
    transforms.ToTensor(), # Tensor화
    transforms.Normalize(0.2860, 0.3530) # torch는 Normalize 전에 Tensor화 돼야 함!
])

tr_ds_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST(
        root='../data/',
        train=True,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

tt_ds_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST(
        root='../data/',
        train=False,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=False
)

$$O=\frac{I-F+2P}{S}+1$$

```py
28 * 28 * 1 # 입력 사이즈
-> 24 * 24 * 10
-> 12 * 12 * 10
    -> 8 * 8 * 20
    -> 4 * 4 * 20 = 320

    # 또는 padding=same 일 때
    -> 12 * 12 * 20
    -> 6 * 6 * 20 = 720

# --------------------------

# 또는 padding=same 일 때
28 * 28 * 1
-> 28 * 28 * 10  
-> 14 * 14 * 10  
    -> 10 * 10 * 20  
    -> 5 * 5 * 20 = 500

    # 또는 padding=same 일 때
    -> 14 * 14 * 20  
    -> 7 * 7 * 20 = 980
```

In [24]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), 2)
        x = F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

In [25]:
model = Model().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 학습에서 스케줄 계획하는 건 학습률 외엔 거의 없음.
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1) # 가급적 에폭마다 스케줄

In [38]:
def train(model, tr_ds_loader, optimizer):
    model.train()
    running_loss = 0.
    total = 0
    correct = 0
    for (x, y) in tr_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        total += len(data)
        running_loss += loss.item() * len(data)
        train_loss = running_loss / total
        correct += (output.argmax(dim=1) == target).sum().item()
        train_accuracy = correct / total

        tr_ds_loader.set_postfix({
            'train_loss': f'{train_loss:.4f}',
            'train_accuracy': f'{train_accuracy * 100:.2f}%'
        })

In [41]:
@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.
    correct = 0
    for data, target in tt_ds_loader:
        data, target = data.to(DEVICE), target.to(DEVICE)
        output = model(data)
        test_loss += F.cross_entropy(output, target).item() * len(data)
        correct += (output.argmax(dim=1) == target).sum().item()
    test_loss /= len(tt_ds_loader.dataset)
    test_accuracy = correct / len(tt_ds_loader.dataset)
    return test_loss, test_accuracy

In [44]:
from tqdm import tqdm
EPOCHS = 3
for epoch in range(1, EPOCHS+1):
    train_bar = tqdm(
        tr_ds_loader,
        desc=f'[Epochs: {epoch}]',
        leave=True
        )
    train(model, train_bar, optimizer)
    scheduler.step()
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)

    tqdm.write(
        f'test_loss: {test_loss:.4f}, '
        f'test_accuracy: {test_accuracy * 100:.2f}%, '
        f'lr_scheduler{scheduler.get_last_lr()}'
        '\n'
    )

[Epochs: 1]: 100%|██████████| 938/938 [00:16<00:00, 56.57it/s, train_loss=0.4908, train_accuracy=82.39%]


test_loss: 0.3798, test_accuracy: 86.09%, lr_scheduler[0.001]



[Epochs: 2]: 100%|██████████| 938/938 [00:17<00:00, 52.30it/s, train_loss=0.4887, train_accuracy=82.65%]


test_loss: 0.3796, test_accuracy: 86.20%, lr_scheduler[0.001]



[Epochs: 3]: 100%|██████████| 938/938 [00:16<00:00, 57.31it/s, train_loss=0.4857, train_accuracy=82.75%]


test_loss: 0.3768, test_accuracy: 86.23%, lr_scheduler[0.0001]



In [46]:
ck_tr = transforms.Compose([transforms.ToTensor()])

tr_ds = datasets.CIFAR10(root='../data/',
                 train=True,
                 download=True,
                 transform=ck_tr)

tr_ds_loader = torch.utils.data.DataLoader(tr_ds,
                                           batch_size=50000,
                                           shuffle=False)

In [47]:
ck_data = iter(tr_ds_loader)
data, _ = next(ck_data)
data.shape

torch.Size([50000, 3, 32, 32])

In [48]:
print(data[:, 0].mean(), data[:, 0].std())
print(data[:, 1].mean(), data[:, 1].std())
print(data[:, 2].mean(), data[:, 2].std())

tensor(0.4914) tensor(0.2470)
tensor(0.4822) tensor(0.2435)
tensor(0.4465) tensor(0.2616)


In [None]:
# 위의 코드를 한 번에!
data.mean(dim=[0, 2, 3]), data.std(dim=[0, 2, 3]) # 0,2,3번 차원 전체를 뭉텅이로 축소하고 1번 차원(channel)만 남겨 계산

(tensor([0.4914, 0.4822, 0.4465]), tensor([0.2470, 0.2435, 0.2616]))

In [50]:
# 상수 설정
BATCH_SIZE = 64
EPOCHS = 10

In [51]:
# 데이터 수정 (노이즈 삽입)
# 1. 데이터 준비
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

tr_ds_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        root='../data/',
        train=True,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

tt_ds_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        root='../data/',
        train=False,
        download=False,
        transform=transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=False
)

# ResNet

In [None]:
# 2. 모델 설계
class BasicBlock(nn.Module):
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)

        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False), 
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out
    
class ResNet(nn.Module):
    def __init__(self, class_n):
        super().__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.l1 = self._make_l(16, 2, 1)
        self.l2 = self._make_l(32, 2, 1)
        self.l3 = self._make_l(64, 2, 1)
        self.out_l = nn.Linear(1024, class_n)

    def _make_l(self, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks-1)
        l = []
        for stride in strides:
            l.append(BasicBlock(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*l)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)

        # 분류기 층
        x = F.avg_pool2d(x, 8)
        x = x.view(x.size(0), -1)
        out = self.out_l(x)
        return out

In [64]:
model = ResNet(10).to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=3, gamma=0.1)

In [65]:
model

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (l1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True

In [66]:
def train(model, tr_ds_loader, optimizer):
    model.train()
    running_loss = 0.
    total = 0
    correct = 0
    for (x, y) in tr_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        
        total += len(data)
        running_loss += loss.item() * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()

        train_loss = running_loss / total
        train_accuracy = correct / total

        tr_ds_loader.set_postfix({
            'train_loss': f'{train_loss:.4f}',
            'train_accuracy': f'{train_accuracy*100:.2f}%'
        })

In [67]:
@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.
    correct = 0
    for x, y in tt_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        output = model(data)
        loss = F.cross_entropy(output, target).item()
        test_loss += loss * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()
    test_loss /= len(tt_ds_loader.dataset)
    test_accuracy = correct / len(tt_ds_loader.dataset)
    return test_loss, test_accuracy

In [None]:
from tqdm import tqdm
EPOCHS = 20
for epoch in range(1, EPOCHS+1):
    train_bar = tqdm(
        tr_ds_loader,
        desc=f'[Epochs: {epoch}/{EPOCHS}]',
        leave=True
    )
    train(model, train_bar, optimizer)
    scheduler.step()
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)
    tqdm.write(
        f'test_loss: {test_loss:.4f}, '
        f'test_accuracy: {test_accuracy * 100:.2f}%, '
        f'lr: {scheduler.get_last_lr()}'
    )

[Epochs: 1/20]: 100%|██████████| 782/782 [00:44<00:00, 17.51it/s, train_loss=2.1054, train_accuracy=24.59%]


test_loss: 1.8224, test_accuracy: 35.56%, lr: [0.1]


[Epochs: 2/20]: 100%|██████████| 782/782 [00:42<00:00, 18.47it/s, train_loss=1.6438, train_accuracy=40.32%]


test_loss: 1.6426, test_accuracy: 41.25%, lr: [0.1]


[Epochs: 3/20]: 100%|██████████| 782/782 [00:42<00:00, 18.49it/s, train_loss=1.4402, train_accuracy=48.71%]


test_loss: 1.4143, test_accuracy: 47.68%, lr: [0.010000000000000002]


[Epochs: 4/20]: 100%|██████████| 782/782 [00:42<00:00, 18.58it/s, train_loss=1.0878, train_accuracy=61.84%]


test_loss: 1.0760, test_accuracy: 61.89%, lr: [0.010000000000000002]


[Epochs: 5/20]: 100%|██████████| 782/782 [00:42<00:00, 18.41it/s, train_loss=1.0094, train_accuracy=64.31%]


test_loss: 0.9864, test_accuracy: 65.11%, lr: [0.010000000000000002]


[Epochs: 6/20]: 100%|██████████| 782/782 [00:42<00:00, 18.39it/s, train_loss=0.9514, train_accuracy=66.54%]


test_loss: 0.9516, test_accuracy: 66.73%, lr: [0.0010000000000000002]


[Epochs: 7/20]: 100%|██████████| 782/782 [00:42<00:00, 18.37it/s, train_loss=0.8668, train_accuracy=69.73%]


test_loss: 0.8774, test_accuracy: 69.00%, lr: [0.0010000000000000002]


[Epochs: 8/20]: 100%|██████████| 782/782 [00:42<00:00, 18.33it/s, train_loss=0.8508, train_accuracy=70.39%]


test_loss: 0.8675, test_accuracy: 69.33%, lr: [0.0010000000000000002]


[Epochs: 9/20]: 100%|██████████| 782/782 [00:42<00:00, 18.58it/s, train_loss=0.8394, train_accuracy=70.80%]


test_loss: 0.8608, test_accuracy: 69.80%, lr: [0.00010000000000000003]


[Epochs: 10/20]: 100%|██████████| 782/782 [00:42<00:00, 18.59it/s, train_loss=0.8258, train_accuracy=71.24%]


test_loss: 0.8525, test_accuracy: 69.89%, lr: [0.00010000000000000003]


[Epochs: 11/20]: 100%|██████████| 782/782 [00:42<00:00, 18.61it/s, train_loss=0.8221, train_accuracy=71.36%]


test_loss: 0.8513, test_accuracy: 69.90%, lr: [0.00010000000000000003]


[Epochs: 12/20]: 100%|██████████| 782/782 [00:42<00:00, 18.34it/s, train_loss=0.8208, train_accuracy=71.34%]


test_loss: 0.8509, test_accuracy: 70.02%, lr: [1.0000000000000004e-05]


[Epochs: 13/20]: 100%|██████████| 782/782 [00:42<00:00, 18.47it/s, train_loss=0.8209, train_accuracy=71.19%]


test_loss: 0.8507, test_accuracy: 69.95%, lr: [1.0000000000000004e-05]


[Epochs: 14/20]: 100%|██████████| 782/782 [00:42<00:00, 18.34it/s, train_loss=0.8200, train_accuracy=71.49%]


test_loss: 0.8513, test_accuracy: 69.94%, lr: [1.0000000000000004e-05]


[Epochs: 15/20]: 100%|██████████| 782/782 [00:42<00:00, 18.26it/s, train_loss=0.8191, train_accuracy=71.35%]


test_loss: 0.8509, test_accuracy: 69.99%, lr: [1.0000000000000004e-06]


[Epochs: 16/20]:  95%|█████████▌| 744/782 [00:40<00:02, 18.72it/s, train_loss=0.8198, train_accuracy=71.48%]

In [74]:
import torch, os, psutil

print("시스템 전체 논리 코어 수:", os.cpu_count())
print("시스템 전체 물리 코어 수:", psutil.cpu_count(logical=False))
print("PyTorch로 설정한 스레드 수:", torch.get_num_threads())

시스템 전체 논리 코어 수: 8
시스템 전체 물리 코어 수: 4
PyTorch로 설정한 스레드 수: 100


In [75]:
torch.set_num_threads(12)
print("PyTorch로 설정한 스레드 수:", torch.get_num_threads()) # 실제 8개가 최대라서 12개로 하면 오버헤드 걸림.

PyTorch로 설정한 스레드 수: 12


In [76]:
torch.set_num_threads(100)
print("PyTorch로 설정한 스레드 수:", torch.get_num_threads()) # 실제 8개가 최대라서 100개로 하면 오버헤드 걸림.

PyTorch로 설정한 스레드 수: 100
