#드라이브 마운트

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#라이브러리 추가하기

In [0]:
import os
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from torchvision import transforms, datasets

#트레이닝 파라메터 설정하기

In [0]:
lr = 1e-3
batch_size = 64
num_epoch = 10

num_freq = 100

ckpt_dir = './drive/My Drive/YouTube/001-pytorch-mnist/checkpoint'
log_dir = './drive/My Drive/YouTube/001-pytorch-mnist/mnist/log'
data_dir = './drive/My Drive/YouTube/001-pytorch-mnist/mnist/data'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

cuda


#네트워크 구축하기

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5, stride=1, padding=0, bias=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5, stride=1, padding=0, bias=True)
        self.drop2 = nn.Dropout2d(p=0.5)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.relu2 = nn.ReLU()

        self.fc1 = nn.Linear(in_features=320, out_features=50, bias=True)
        self.relu1_fc1 = nn.ReLU()
        self.drop1_fc1 = nn.Dropout2d(p=0.5)

        self.fc2 = nn.Linear(in_features=50, out_features=10, bias=True)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.drop2(x)
        x = self.pool2(x)
        x = self.relu2(x)

        x = x.view(-1, 320)

        x = self.fc1(x)
        x = self.relu1_fc1(x)
        x = self.drop1_fc1(x)

        x = self.fc2(x)

        return x

#네트워크를 저장하거나 불러오는 함수 작성

In [0]:
def save(ckpt_dir, net, optim, epoch):
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    torch.save({'net': net.state_dict(), 'optim': optim.state_dict()},
               './%s/model_epoch%d.pth' % (ckpt_dir, epoch))

    # print('model_epoch%d.pth is saved.' % epoch)

def load(ckpt_dir, net, optim):
    if not os.path.exists(ckpt_dir):
      epoch = 0
      return net, optim, epoch
    
    ckpt_lst = os.listdir(ckpt_dir)
    ckpt_lst.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))

    dict_model = torch.load('./%s/%s' % (ckpt_dir, ckpt_lst[-1]))

    net.load_state_dict(dict_model['net'])
    optim.load_state_dict(dict_model['optim'])
    epoch = int(ckpt_lst[-1].split('epoch')[1].split('.pth')[0])

    # print('%s is loaded.' % ckpt_lst[-1])

    return net, optim, epoch

#학습 데이터 불러오기

In [0]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5,), std=(0.5,))])

dataset = datasets.MNIST(download=True, root=data_dir, train=True, transform=transform)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

num_data = len(loader.dataset)
num_batch = round(num_data / batch_size)

#네트워크 생성하기

In [0]:
net = Net().to(device)
params = net.parameters()

#손실함수 등을 설정하기

In [0]:
fn_loss = nn.CrossEntropyLoss().to(device)
fn_pred = lambda output: torch.softmax(output, dim=1)
fn_acc = lambda pred, label: ((pred.max(dim=1)[1] == label).type(torch.float)).mean()

optim = torch.optim.Adam(params, lr=lr)

writer = SummaryWriter(log_dir=log_dir)

#Load the trained network

In [0]:
st_epoch = 0
net, optim, st_epoch = load(ckpt_dir=ckpt_dir, net=net, optim=optim)

#트레이닝 시작하기

In [0]:
for epoch in range(st_epoch + 1, num_epoch + 1):
# for epoch in tqdm_notebook(range(1, num_epoch + 1)):
    net.train()

    loss_arr = []
    acc_arr = []

    for batch, (input, label) in enumerate(loader, 1):
    # for batch, (input, label) in enumerate(tqdm_notebook(loader), 1):
        # forward propagation 하기
        input = input.to(device)
        label = label.to(device)

        output = net(input)
        pred = fn_pred(output)

        # backward propagation 하기
        optim.zero_grad()

        loss = fn_loss(output, label)
        loss.backward()

        acc = fn_acc(pred, label)

        optim.step()

        # 손실함수를 계산하기
        loss_arr += [loss.item()]
        acc_arr += [acc.item()]

        if batch % num_freq == 0:
          print('TRAIN: EPOCH %d/%d | BATCH %04d/%04d | LOSS: %.4f | ACC: %.4f' %
                (epoch, num_epoch, batch, num_batch, np.mean(loss_arr), np.mean(acc_arr)))

    # log 를 저장하기
    writer.add_scalar('loss', np.mean(loss_arr), epoch)
    writer.add_scalar('acc', np.mean(acc_arr), epoch)

    # 네트워크를 저장하기
    save(ckpt_dir=ckpt_dir, net=net, optim=optim, epoch=epoch)

writer.close()

TRAIN: EPOCH 1/10 | BATCH 0100/0938 | LOSS: 0.1373 | ACC: 0.9609
TRAIN: EPOCH 1/10 | BATCH 0200/0938 | LOSS: 0.1364 | ACC: 0.9607
TRAIN: EPOCH 1/10 | BATCH 0300/0938 | LOSS: 0.1345 | ACC: 0.9604
TRAIN: EPOCH 1/10 | BATCH 0400/0938 | LOSS: 0.1379 | ACC: 0.9593
TRAIN: EPOCH 1/10 | BATCH 0500/0938 | LOSS: 0.1387 | ACC: 0.9588
TRAIN: EPOCH 1/10 | BATCH 0600/0938 | LOSS: 0.1366 | ACC: 0.9592
TRAIN: EPOCH 1/10 | BATCH 0700/0938 | LOSS: 0.1375 | ACC: 0.9594
TRAIN: EPOCH 1/10 | BATCH 0800/0938 | LOSS: 0.1382 | ACC: 0.9592
TRAIN: EPOCH 1/10 | BATCH 0900/0938 | LOSS: 0.1404 | ACC: 0.9591
TRAIN: EPOCH 2/10 | BATCH 0100/0938 | LOSS: 0.1295 | ACC: 0.9575
TRAIN: EPOCH 2/10 | BATCH 0200/0938 | LOSS: 0.1322 | ACC: 0.9599
TRAIN: EPOCH 2/10 | BATCH 0300/0938 | LOSS: 0.1285 | ACC: 0.9605
TRAIN: EPOCH 2/10 | BATCH 0400/0938 | LOSS: 0.1280 | ACC: 0.9611
TRAIN: EPOCH 2/10 | BATCH 0500/0938 | LOSS: 0.1299 | ACC: 0.9603
TRAIN: EPOCH 2/10 | BATCH 0600/0938 | LOSS: 0.1309 | ACC: 0.9605
TRAIN: EPOCH 2/10 | BATCH