<a href="https://colab.research.google.com/github/hyeong8465/paper/blob/main/ResNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F


from torch.utils.data import Dataset
from torchvision import datasets

from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt

In [2]:
training_data = datasets.CIFAR100(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.CIFAR100(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:05<00:00, 29642078.07it/s]


Extracting data/cifar-100-python.tar.gz to data
Files already downloaded and verified


In [64]:
train_dataloader = DataLoader(training_data, batch_size=256, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=256, shuffle=True)

In [65]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [66]:
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        # 32x32 데이터에 7x7는 좀 큰 것 같아서 3x3로 줄이고 s:1, p:1로 사이즈를 줄이지 않음
        self.block0 = nn.Sequential(
            nn.Conv2d(3, 64, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(3, stride = 2, padding = 1)
        ) # 16x16

        self.block64 = nn.Sequential(
            nn.Conv2d(64, 64, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(64)
        ) # 16x16

        self.block128_d = nn.Sequential(
            nn.Conv2d(64, 128, 3, stride = 2, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(128)
        ) # 8x8
        self.block128 = nn.Sequential(
            nn.Conv2d(128, 128, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(128)
        ) # 8x8

        self.block256_d = nn.Sequential(
            nn.Conv2d(128, 256, 3, stride = 2, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(256)
        ) # 4x4
        self.block256 = nn.Sequential(
            nn.Conv2d(256, 256, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(256)
        ) # 4x4

        self.block512_d = nn.Sequential(
            nn.Conv2d(256, 512, 3, stride = 2, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(512)
        ) # 2x2
        self.block512 = nn.Sequential(
            nn.Conv2d(512, 512, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, stride = 1, padding = 1),
            nn.BatchNorm2d(512)
        ) # 2x2

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512, 100)

    def forward(self, x):
        x = self.block0(x)

        res = x
        x = self.block64(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block64(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block64(x)
        x += res
        x = F.relu(x)

        res = F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, 32, 32), "constant", 0)
        x = self.block128_d(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block128(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block128(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block128(x)
        x += res
        x = F.relu(x)

        res = F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, 64, 64), "constant", 0)
        x = self.block256_d(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block256(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block256(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block256(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block256(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block256(x)
        x += res
        x = F.relu(x)

        res = F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, 128, 128), "constant", 0)
        x = self.block512_d(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block512(x)
        x += res
        x = F.relu(x)
        res = x
        x = self.block512(x)
        x += res
        x = F.relu(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [67]:
model = ResNet().to(device)
print(model)

ResNet(
  (block0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (block64): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (block128_d): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, mome

In [69]:
learning_rate = 0.1
batch_size = 256
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.1)


In [70]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # 예측(prediction)과 손실(loss) 계산
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        # 역전파
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # torch.no_grad()를 사용하여 테스트 시 변화도(gradient)를 계산하지 않도록
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    scheduler.step(test_loss)

In [71]:
epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 5.459112  [  256/50000]
loss: 4.445406  [25856/50000]
Test Error: 
 Accuracy: 3.8%, Avg loss: 4.263344 

Epoch 2
-------------------------------
loss: 4.183405  [  256/50000]
loss: 3.807117  [25856/50000]
Test Error: 
 Accuracy: 10.3%, Avg loss: 3.818148 

Epoch 3
-------------------------------
loss: 3.647131  [  256/50000]
loss: 3.426599  [25856/50000]
Test Error: 
 Accuracy: 11.6%, Avg loss: 3.855020 

Epoch 4
-------------------------------
loss: 3.013697  [  256/50000]
loss: 3.134787  [25856/50000]
Test Error: 
 Accuracy: 15.2%, Avg loss: 3.588246 

Epoch 5
-------------------------------
loss: 3.014247  [  256/50000]
loss: 3.006446  [25856/50000]
Test Error: 
 Accuracy: 23.9%, Avg loss: 3.092135 

Epoch 6
-------------------------------
loss: 2.570596  [  256/50000]
loss: 2.736684  [25856/50000]
Test Error: 
 Accuracy: 22.4%, Avg loss: 3.248339 

Epoch 7
-------------------------------
loss: 2.299149  [  256/50000]
loss: 2.396899  [25

In [None]:
# 정확도랑 loss가 꼭 반비례하진 않음
# 왜..? 성능이...?

# 스케줄러 적용 전 결과

In [59]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 5.644894  [  256/50000]
loss: 4.567046  [25856/50000]
Test Error: 
 Accuracy: 2.4%, Avg loss: 4.563791 

Epoch 2
-------------------------------
loss: 4.552215  [  256/50000]
loss: 4.233867  [25856/50000]
Test Error: 
 Accuracy: 5.6%, Avg loss: 4.233245 

Epoch 3
-------------------------------
loss: 4.031446  [  256/50000]
loss: 3.785212  [25856/50000]
Test Error: 
 Accuracy: 8.8%, Avg loss: 3.918862 

Epoch 4
-------------------------------
loss: 3.556371  [  256/50000]
loss: 3.585092  [25856/50000]
Test Error: 
 Accuracy: 15.2%, Avg loss: 3.550575 

Epoch 5
-------------------------------
loss: 3.324433  [  256/50000]
loss: 3.323803  [25856/50000]
Test Error: 
 Accuracy: 18.8%, Avg loss: 3.379224 

Epoch 6
-------------------------------
loss: 3.059703  [  256/50000]
loss: 2.908569  [25856/50000]
Test Error: 
 Accuracy: 19.4%, Avg loss: 3.477236 

Epoch 7
-------------------------------
loss: 2.601398  [  256/50000]
loss: 2.481053  [2585

In [60]:
# 확인을 위해 10번 돌린 모델 저장
torch.save(model.state_dict(), 'resnet34_cifar100.pth')

In [62]:
# 10번 학습한 모델 불러와서 100번 추가 학습
model.load_state_dict(torch.load('/content/resnet34_cifar100.pth'))
epochs = 100
for t in range(epochs):
    print(f"Epoch {t+11}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 11
-------------------------------
loss: 1.876707  [  256/50000]
loss: 1.743318  [25856/50000]
Test Error: 
 Accuracy: 34.0%, Avg loss: 2.819623 

Epoch 12
-------------------------------
loss: 1.610768  [  256/50000]
loss: 1.759703  [25856/50000]
Test Error: 
 Accuracy: 34.6%, Avg loss: 2.658539 

Epoch 13
-------------------------------
loss: 1.618271  [  256/50000]
loss: 1.639172  [25856/50000]
Test Error: 
 Accuracy: 37.1%, Avg loss: 2.544223 

Epoch 14
-------------------------------
loss: 1.218790  [  256/50000]
loss: 1.434734  [25856/50000]
Test Error: 
 Accuracy: 19.3%, Avg loss: 3.364786 

Epoch 15
-------------------------------
loss: 3.086596  [  256/50000]
loss: 2.240257  [25856/50000]
Test Error: 
 Accuracy: 36.7%, Avg loss: 2.511128 

Epoch 16
-------------------------------
loss: 1.868688  [  256/50000]
loss: 1.761163  [25856/50000]
Test Error: 
 Accuracy: 39.1%, Avg loss: 2.418676 

Epoch 17
-------------------------------
loss: 1.336061  [  256/50000]
loss: 1.473

KeyboardInterrupt: 