# 환경설정

In [None]:
pip install torchviz

Collecting torchviz
  Downloading torchviz-0.0.2.tar.gz (4.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torchviz
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
  Created wheel for torchviz: filename=torchviz-0.0.2-py3-none-any.whl size=4131 sha256=0018570be02681ec555490f21ce907891296fa59b8244d2e796973c6455c4949
  Stored in directory: /root/.cache/pip/wheels/4c/97/88/a02973217949e0db0c9f4346d154085f4725f99c4f15a87094
Successfully built torchviz
Installing collected packages: torchviz
Successfully installed torchviz-0.0.2


In [None]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import pickle
from torchviz import make_dot
import torch.utils.data as data
import torchvision as tv
from torch.utils.data.dataloader import DataLoader

In [None]:
import time
def timer(func):
  def wrapper(*args, **kwargs):
    start_time = time.time()
    result = func(*args, **kwargs)
    end_time = time.time()
    computation_time = end_time - start_time
    print(f"excution time {func.__name__}: {computation_time} seconds")

    return result
  return wrapper

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 과제1)
* mnist 분류문제에서 test accuracy를 계산하는 코드 구현. 이때 checkpoint생성하기!

### 데이터 로드

In [None]:
# torchvision에서 데이터 세트 읽기
train_dataset = tv.datasets.MNIST(".", download=True, train=True, transform = tv.transforms.ToTensor())
test_dataset = tv.datasets.MNIST(".", download=True, train=False, transform = tv.transforms.ToTensor())
# batch = 64개씩
min_batch = 64
# dataloader로 데이터셋 구조화
train_loader = DataLoader(dataset = train_dataset, batch_size=min_batch, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size=min_batch, shuffle=False)

### Model 셋팅

In [7]:
class mnist_classification(nn.Module):
  def __init__(self, hidden_variables=[128,64,10], input_output_dim = (28*28, 10)):
    super().__init__()
    self.input_variable_dim = input_output_dim[0]
    self.output_variable_dim = input_output_dim[1]
    self.list_hidden_variable = hidden_variables
    self.layer = nn.Sequential() # nn.sequential : 신경망 순차적으로 관리할 때 사용
    variable_dim = self.input_variable_dim

    for i, hidden_variable in enumerate(self.list_hidden_variable):
      self.layer.add_module('layer_'+str(i), nn.Linear(variable_dim, hidden_variable))
      self.layer.add_module('activation_'+str(i), nn.ReLU()) # nn.ReLU() : Rectified Linear Unit, max(0, x)
      variable_dim = hidden_variable # (다음 hidden layer step입장에서) 앞선 layer의 dimension 정보를 가지고있는 변수
    self.layer.add_module('final_layer', nn.Linear(variable_dim, self.output_variable_dim)) # 마지막 layer에서는 출력 dimension에 맞게

  def forward(self, x):
    x = x.view(-1, 28*28) # 텐서에서의 view: 넘파이에서의 리쉐이프(Reshape)와 같은 역할.
                          # -1의 역할 : 텐서를 (?, 28*28)의 크기로 변경
    y_hat = self.layer(x)

    return y_hat

### 학습 스텝 코드 (p.56-2)

In [9]:
def make_train_step(model, loss_fn, optimizer):
  def train_step_fn(x,y):

    model.train()
    # model.train() : 모델이 훈련 데이터에 대해 학습을 진행하고 그래디언트를 계산하여 파라미터를 업데이트할 수 있도록 준비
    # model.eval() : 모델이 테스트 데이터에 대해 추론을 수행하고 그래디언트를 계산하지 않도록 설정
    y_hat = model(x)
    loss = loss_fn(y_hat, y)
    loss.backward() # loss.backward : 오차(error)를 역전파
    optimizer.step()
    optimizer.zero_grad()

    return loss.item()
  return train_step_fn

### model, loss_fn, opti fn 셋팅 (p.56-1)

In [24]:
lr = 0.1
epochs = 20 # 1000시간너무오래걸림
model = mnist_classification().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()  # classification 에서 사용 (https://nuguziii.github.io/dev/dev-002/)
train_step = make_train_step(model, loss_fn, optimizer)

### 학습 management 코드

In [27]:
@timer
def mnist_train_model(epochs=epochs):

  train_losses = []
  for epoch in range(epochs):
    mini_batch_losses =[]
    for x_minibatch, y_minibatch in train_loader:
      x_minibatch = x_minibatch.to(device)
      y_minibatch = y_minibatch.to(device)

      mini_batch_loss = train_step(x_minibatch, y_minibatch)
      mini_batch_losses.append(mini_batch_loss)

    # evaluate train loss
    if (epoch+1) % 5 == 0:
      loss = np.mean(mini_batch_losses)
      print("train loss at {} epoch: {}".format(epoch+1, loss))
      train_losses.append(loss)

  # evaluate validation loss
  with torch.no_grad():
    # no_grad() with statement에 포함시키게 되면 Pytorch는 autograd engine을 꺼버림
    # 즉, 이 말은 더 이상 자동으로 gradient를 트래킹하지 않는다
    # https://coffeedjimmy.github.io/pytorch/2019/11/05/pytorch_nograd_vs_train_eval/

    test_accuracy = 0
    test_result = 0
    test_cnt = 0

    for x_minibatch_test, y_minibatch_test in test_loader:
      x_minibatch_test = x_minibatch_test.to(device)
      y_minibatch_test = y_minibatch_test.to(device)

      y_hat_val = model(x_minibatch_test)
      y_true_cnt = torch.sum(torch.argmax(y_hat_val, axis=1)==y_minibatch_test)
      y_cnt = len(y_hat_val)

      test_result += y_true_cnt
      test_cnt += y_cnt

    test_accuracy = 100 * test_result / test_cnt

    print("test accuracy : {}%".format(test_accuracy))

  return train_losses, test_accuracy


In [28]:
train_loss, test_accuracy = mnist_train_model(epochs=epochs)

train loss at 5 epoch: 0.00085104792637341
train loss at 10 epoch: 0.00028793921242363654
train loss at 15 epoch: 0.00018592344468304015
train loss at 20 epoch: 0.00014057266478970814
test accuracy : 98.0199966430664%
excution time mnist_train_model: 223.06588697433472 seconds


In [32]:
checkpoint = {'epochs':epochs,
              'model_state_dict':model.state_dict(),
              'optimizer_state_dict':optimizer.state_dict(),
              'train_loss':train_loss,
              'test_accuracy':test_accuracy,
              }

In [33]:
torch.save(checkpoint, '/content/mnist_model_checkpoint.pth')

# 과제2)
* CIFAR-10데이터 학습. 5회반복측정한 평균 test_accuracy

In [34]:
# torchvision에서 데이터 세트 읽기
train_dataset = tv.datasets.CIFAR10(".", download=True, train=True, transform = tv.transforms.ToTensor())
test_dataset = tv.datasets.CIFAR10(".", download=True, train=False, transform = tv.transforms.ToTensor())
# batch = 128개씩
min_batch = 128
# dataloader로 데이터셋 구조화
train_loader = DataLoader(dataset = train_dataset, batch_size=min_batch, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size=min_batch, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 79814756.25it/s]


Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified


In [35]:
class cifar_classification(nn.Module):
  def __init__(self, hidden_variables=[256,128,64,10], input_output_dim = ((32*32*3), 10)):
    super().__init__()
    self.input_variable_dim = input_output_dim[0]
    self.output_variable_dim = input_output_dim[1]
    self.list_hidden_variable = hidden_variables
    self.layer = nn.Sequential()
    variable_dim = self.input_variable_dim

    for i, hidden_variable in enumerate(self.list_hidden_variable):
      self.layer.add_module('layer_'+str(i), nn.Linear(variable_dim, hidden_variable))
      self.layer.add_module('activation_'+str(i), nn.ReLU())
      variable_dim = hidden_variable
    self.layer.add_module('final_layer', nn.Linear(variable_dim, self.output_variable_dim))

  def forward(self, x):
    x = x.view(-1, (32*32*3))
    y_hat = self.layer(x)

    return y_hat

In [36]:
def make_train_step(model, loss_fn, optimizer):
  def train_step_fn(x,y):

    model.train()
    y_hat = model(x)
    loss = loss_fn(y_hat, y)
    loss.backward() # loss.backward : 오차(error)를 역전파
    optimizer.step()
    optimizer.zero_grad()

    return loss.item()
  return train_step_fn

In [37]:
lr = 0.1
epochs = 20
model = cifar_classification().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()
train_step = make_train_step(model, loss_fn, optimizer)

In [43]:
@timer
def cifar_train_model(epochs=epochs):

  train_losses = []
  for epoch in range(epochs):
    mini_batch_losses =[]
    for x_minibatch, y_minibatch in train_loader:
      x_minibatch = x_minibatch.to(device)
      y_minibatch = y_minibatch.to(device)

      mini_batch_loss = train_step(x_minibatch, y_minibatch)
      mini_batch_losses.append(mini_batch_loss)

    # evaluate train loss
    loss = np.mean(mini_batch_losses)
    print("train loss at {} epoch: {}".format(epoch+1, loss))
    train_losses.append(loss)

  # evaluate validation loss
  with torch.no_grad():

    test_accuracy = 0
    test_result = 0
    test_cnt = 0

    for x_minibatch_test, y_minibatch_test in test_loader:
      x_minibatch_test = x_minibatch_test.to(device)
      y_minibatch_test = y_minibatch_test.to(device)

      y_hat_val = model(x_minibatch_test)
      y_true_cnt = torch.sum(torch.argmax(y_hat_val, axis=1)==y_minibatch_test)
      y_cnt = len(y_hat_val)

      test_result += y_true_cnt
      test_cnt += y_cnt

    test_accuracy = 100 * test_result / test_cnt

    print("test accuracy : {}%".format(test_accuracy))

  return train_losses, test_accuracy


In [44]:
cifar_checkpoint = []
for i in range(5):
  train_loss, test_accuracy = mnist_train_model(epochs=epochs)

  checkpoint = {'epochs':epochs,
              'model_state_dict':model.state_dict(),
              'optimizer_state_dict':optimizer.state_dict(),
              'train_loss':train_loss,
              'test_accuracy':test_accuracy,
              }
  cifar_checkpoint.append(checkpoint)
  torch.save(checkpoint, '/content/cifar_model_checkpoint_{}.pth'.format(i))

train loss at 5 epoch: 1.7109298334097314
test accuracy : 36.93000030517578%
excution time mnist_train_model: 73.81090092658997 seconds
train loss at 5 epoch: 1.5256964943902878
test accuracy : 44.400001525878906%
excution time mnist_train_model: 73.27522873878479 seconds
train loss at 5 epoch: 1.408014210288787
test accuracy : 46.630001068115234%
excution time mnist_train_model: 73.31957197189331 seconds
train loss at 5 epoch: 1.3080901706310184
test accuracy : 49.41999816894531%
excution time mnist_train_model: 75.6358699798584 seconds
train loss at 5 epoch: 1.2214825625919625
test accuracy : 49.84000015258789%
excution time mnist_train_model: 72.44580245018005 seconds


In [1]:
(36.93+44.4+46.6+49.41+49.84)/5

45.436