In [155]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torchvision
from torch import Tensor
from typing import Any, Callable, List, Optional, Type, Union


In [156]:
# 공개 데이터셋에서 학습 데이터를 내려받습니다.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# 공개 데이터셋에서 테스트 데이터를 내려받습니다.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [157]:
batch_size = 64

# 데이터로더를 생성합니다.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break
print(test_dataloader.dataset.classes)

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64
['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']


In [158]:
# 학습에 사용할 CPU나 GPU, MPS 장치를 얻습니다.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")


Using cuda device


In [159]:

torch.cuda.empty_cache()


In [160]:
class Block(nn.Module):
    def __init__(self, inputs, outputs, _Downsample = False):
        super().__init__()
        self._Downsample = _Downsample
        self.downsampling = nn.Sequential(
            # In paper, 
            # downsampling is performed by conv3_1, conv4_1, and conv5_1 with a stride of 2
            #nn.Conv1d(inputs, outputs, kernel_size=1, stride=1)
            nn.Conv2d(inputs, outputs, kernel_size=1, stride=1),
            #nn.MaxPool2d(kernel_size=1, stride=2)
        )
        self.conv1 = nn.Conv2d(inputs, outputs, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(outputs)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(outputs, outputs, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(outputs)
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        print('x1(identity) :', x.shape, 'Downsample :', self._Downsample)
        identity = x
        if self._Downsample == True:
            identity = self.downsampling(identity)
            print('x1(downsampled) :', identity.shape)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        print('x2 :', x.shape)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        print('x3 :', x.shape)
        x += identity
        print('x4 :', x.shape)
        return x
    
class MyResNet34(nn.Module):
    def __init__(self, _BlockClass = Block):
        super().__init__()
        
        # 1. input layer
        # input : 224x224x3
        # output : 112x112x64
        ### padding 3, 1 각각 왜있는지 모르겠음. 일단 추천하는대로 넣으니 shape 맞아떨어짐.
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels= 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        # 2. 64ch 3x3 conv x 3
        # input : 112x112x64
        # output : 56x56x64
        self.conv64blocks = nn.Sequential(
            _BlockClass(64, 64),
            _BlockClass(64, 64),
            _BlockClass(64, 64)
        )
        # 3. 128ch 3x3 conv x 4
        # input : 56x56x64
        # output : 28x28x128
        self.conv128blocks = nn.Sequential(
            _BlockClass(64, 128, _Downsample = True),
            _BlockClass(128, 128),
            _BlockClass(128, 128),
            _BlockClass(128, 128)
        )
        # 4. 256ch 3x3 conv x 6
        # input : 28x28x128
        # output : 14x14x256
        self.conv256blocks = nn.Sequential(
            _BlockClass(128, 256, _Downsample = True),
            _BlockClass(256, 256),
            _BlockClass(256, 256),
            _BlockClass(256, 256),
            _BlockClass(256, 256),
            _BlockClass(256, 256)
        )
        # 5. 512ch 3x3 conv x 3
        # input : 14x14x256
        # output : 7x7x512
        self.conv512blocks = nn.Sequential(
            _BlockClass(256, 512, _Downsample = True),
            _BlockClass(512, 512),
            _BlockClass(512, 512)
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(in_features = 512, out_features= 1000)
        self.softmax = nn.Softmax(dim=1)
        
    
    def forward(self, x):
        x = self.conv1(x)
        print('conv7x7 64ch :', x.shape)
        x = self.bn1(x)
        print('conv7x7 64ch :', x.shape)
        x = self.relu(x)
        print('conv7x7 64ch :', x.shape)
        x = self.maxpool(x)
        print('1st :', x.shape)
        x = self.conv64blocks(x)
        x = self.maxpool(x)
        print('2nd :', x.shape)
        x = self.conv128blocks(x)
        x = self.maxpool(x)
        print('3rd :', x.shape)
        x = self.conv256blocks(x)
        x = self.maxpool(x)
        print('4th :', x.shape)
        x = self.conv512blocks(x)
        print('5th :', x.shape)
        
        x = self.avgpool(x)
        print('avgpool :', x.shape)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x = self.softmax(x)
        
        return x
        

    

In [161]:

model = MyResNet34(Block).to(device)
print(model)

MyResNet34(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv64blocks): Sequential(
    (0): Block(
      (downsampling): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      )
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Block(
      (downsampling): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      )
      (conv1): Conv2d(64, 64, kernel_size=(3, 3

In [162]:
print(torch.cuda.is_available())

True


In [163]:
torch.cuda.empty_cache()

In [164]:

tensor = torch.randn(64, 3, 224, 224)
print(tensor.shape)
out = model(tensor.to(device))

torch.Size([64, 3, 224, 224])
conv7x7 64ch : torch.Size([64, 64, 112, 112])
conv7x7 64ch : torch.Size([64, 64, 112, 112])
conv7x7 64ch : torch.Size([64, 64, 112, 112])
1st : torch.Size([64, 64, 56, 56])
x1(identity) : torch.Size([64, 64, 56, 56]) Downsample : False
x2 : torch.Size([64, 64, 56, 56])
x3 : torch.Size([64, 64, 56, 56])
x4 : torch.Size([64, 64, 56, 56])
x1(identity) : torch.Size([64, 64, 56, 56]) Downsample : False
x2 : torch.Size([64, 64, 56, 56])
x3 : torch.Size([64, 64, 56, 56])
x4 : torch.Size([64, 64, 56, 56])
x1(identity) : torch.Size([64, 64, 56, 56]) Downsample : False
x2 : torch.Size([64, 64, 56, 56])
x3 : torch.Size([64, 64, 56, 56])
x4 : torch.Size([64, 64, 56, 56])
2nd : torch.Size([64, 64, 28, 28])
x1(identity) : torch.Size([64, 64, 28, 28]) Downsample : True
x1(downsampled) : torch.Size([64, 128, 28, 28])
x2 : torch.Size([64, 128, 28, 28])
x3 : torch.Size([64, 128, 28, 28])
x4 : torch.Size([64, 128, 28, 28])
x1(identity) : torch.Size([64, 128, 28, 28]) Downsam

In [165]:
print(out.shape)
print(torch.argmax(out, dim=1))

torch.Size([64, 1000])
tensor([130, 193, 130, 193, 193, 130, 839, 130, 193, 839, 130, 130, 130, 193,
        130, 193, 130, 193, 193, 193, 130, 193, 130, 193, 193, 130, 130, 193,
        130, 130, 130, 193, 130, 193, 130, 193, 130, 193, 193, 193, 130, 193,
        193, 130, 193, 130, 193, 130, 130, 130, 193, 130, 193, 193, 193, 193,
        193, 130, 193, 193, 839, 130, 130, 193], device='cuda:0')


In [166]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [167]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # 예측 오류 계산
        pred = model(X)
        loss = loss_fn(pred, y)

        # 역전파
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [168]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [169]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 1, 28, 28] to have 3 channels, but got 1 channels instead

In [None]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

In [None]:
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))

In [None]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')