In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset
from torch.utils.data import DataLoader,Subset
from torch.utils.data import random_split
import torchvision.transforms as transforms

import torchvision

import numpy as np

In [None]:
# ResNet
"""
  skip-connection :
    y = f(x) + x
  
  1x1 convolution의 역할 :
    channel의 축소
    ex ) 28 * 28 * 192  conv  1 * 1 * 16
      => 28 * 28 * 16으로 데이터 줄이기 가능

"""
class ResidualNet(nn.Module):
  def __init__(self, output_dim):
    super(ResidualNet, self).__init__()

    self.n_classes = output_dim
    # 16, 1, 28, 28
    self.conv1 = nn.Conv2d(1,8,kernel_size = 3, stride =1, padding = 1)
    # 16, 128, 28, 28
    self.block = nn.Sequential(
        nn.Conv2d(8,4,kernel_size = 1,stride = 1),
        nn.ReLU(),
        nn.Conv2d(4,4, kernel_size = 3, stride = 1, padding = 1),
        nn.ReLU(),
        nn.Conv2d(4,8, kernel_size = 1, stride = 1),
    )

    self.softmax = nn.Softmax(dim = -1)
    self.fc = nn.Linear(8* 28 * 28,self.n_classes)

  def forward(self,x):
    x = self.conv1(x)
    identity = x
    out = self.block(x)
    out += identity
    #flatten
    out = out.view(x.size(0),-1)
    out = self.softmax(out)
    out = self.fc(out)
    return out

In [None]:
mnist_dl = torchvision.datasets.QMNIST('./',download = True,
                                          transform = transforms.ToTensor())
idx = np.arange(0,6000,2)
mnist = Subset(mnist_dl,idx)
data_loader = torch.utils.data.DataLoader(mnist_dl,
                                          batch_size = 64,
                                          shuffle = True
                                          )

In [None]:
dataset_size = len(mnist)
train_size = int(dataset_size * 0.8)
valid_size = int(dataset_size * 0.1)
test_size = dataset_size - train_size - valid_size
train_dl, valid_dl, test_dl = random_split(mnist,[train_size, valid_size, test_size])

print(f'train data size : {len(train_dl)}')
print(f'valid data size : {len(valid_dl)}')
print(f'test data size : {len(test_dl)}')

train data size : 2400
valid data size : 300
test data size : 300


In [None]:
train_loader = DataLoader(train_dl, batch_size = 16, shuffle = True, drop_last = True)
valid_loader = DataLoader(valid_dl, batch_size = 16, shuffle = True, drop_last = True)
test_loader = DataLoader(test_dl, batch_size = 16, shuffle = True, drop_last = True)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
model = ResidualNet(10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [None]:
import torch.nn.functional as F
for epoch in range(30):
  cost = 0
  for x, y in train_loader:
    x = x.to(device)
    y = y.to(device)
    pred = model(x)
    loss = criterion(pred,y)
    optimizer.zero_grad()

    loss.backward()
    optimizer.step()

    cost += loss
  
  cost = cost / len(train_loader)
  
  if epoch % 10 == 0:
    print(f"Epoch {epoch} : {cost}")

with torch.no_grad():
  model.eval()
  cost = 0
  for x,y in valid_loader:
    x = x.to(device)
    y = y.to(device)

    pred = model(x)
    prediction = torch.argmax(pred,1) == y

    loss = prediction.float().mean()

    cost += loss

  cost /= len(valid_loader)
  print(f"Acc : {cost * 100}")



Epoch 0 : 0.017216073349118233
Epoch 10 : 0.007503797300159931
Epoch 20 : 0.003453581128269434
Acc : 100.0
