In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x112a0f170>

# What is a Drop out?
- 딥러닝에서 사용하는 Regularization 기법 중 하나임.
- 학습을 할 때 forward pass시 랜덤하게 일정 확률 만큼의 뉴런 출력값을 0으로 만듦.
- 네트워크가 제한된 representation을 가지고도 작동할 수 있게함.
- Drop out을 사용하는 것이 여러 sub network들을 ensemble한 결과로 볼 수 있음

<img src = './images/dropout.png' width= 300>


- 학습을 할 때 랜덤하게 일정 뉴런의 출력값을 0으로 만들어 실제 테스트 시에는 이러한 randomness를 평균해주기 위해 drop확률 만큼을 출력값에 곱해준다.  
$ E[a]=$  
$=w_{1}x+w_{2}y = \frac{1}{4}(w_{1}x+w_{2}y) + \frac{1}{4}(w_{1}x+0*y)+\frac{1}{4}(0*x+w_{2}y)+\frac{1}{4}(0*x+0*y)$  
$=\frac{1}{2}(w_{1}x+w_{2}y)$
- 네트워크의 일부분만 학습을 해 학습속도가 빠르다.

# 1. MNIST데이터

In [2]:
BATCH_SIZE= 64
train_dataset = vdatasets.MNIST(root='../data/MNIST/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=False)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../data/MNIST/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=False,
                                           num_workers=2)

# 2. 모델

Training vs Evaluation  

Before training the model, it is imperative to call model.train(). Likewise, you must call model.eval() before testing the model. This corrects for the differences in dropout, batch normalization during training and testing.

In [3]:
class NN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_p=0.5):
        super(NN, self).__init__()
        self.linear1= nn.Linear(input_size,hidden_size)
        self.linear2= nn.Linear(hidden_size, hidden_size)
        self.linear3= nn.Linear(hidden_size, output_size)
        
        self.dropout= nn.Dropout(dropout_p)
        
    def forward(self, inputs):
        outputs= F.relu(self.linear1(inputs))
        outputs= self.dropout(outputs)
        outputs= F.relu(self.linear2(outputs))
        outputs= self.dropout(outputs)
        return self.linear3(outputs)

In [4]:
INPUT_SIZE=train_dataset[0][0].size()[1]**2
HIDDEN_SIZE= 512
OUTPUT_SIZE=10

model= NN(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

model.train() # train process

model.eval() # test process


NN(
  (linear1): Linear(in_features=784, out_features=512, bias=True)
  (linear2): Linear(in_features=512, out_features=512, bias=True)
  (linear3): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.5)
)

# 3. 학습

In [5]:
EPOCH=15
LR=0.1
INPUT_SIZE=train_dataset[0][0].size()[1]**2
HIDDEN_SIZE= 512
OUTPUT_SIZE=10
BATCH_SIZE=64
NUM_LAYERS=1
DROPOUT=0.5

model= NN(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

loss_function= nn.CrossEntropyLoss()
optimizer= optim.SGD(model.parameters(), lr=LR)

In [6]:
%%time
model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        model.zero_grad()
        pred= model(inputs.view(len(inputs),-1))
        loss= loss_function(pred, targets)
        loss.backward()
        optimizer.step()
        
        losses.append(loss.data.tolist())
        if i % 500 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]

[0/15] [000/938] mean_loss : 2.312
[0/15] [500/938] mean_loss : 0.759
[1/15] [000/938] mean_loss : 0.220
[1/15] [500/938] mean_loss : 0.242
[2/15] [000/938] mean_loss : 0.242
[2/15] [500/938] mean_loss : 0.173
[3/15] [000/938] mean_loss : 0.098
[3/15] [500/938] mean_loss : 0.142
[4/15] [000/938] mean_loss : 0.061
[4/15] [500/938] mean_loss : 0.126
[5/15] [000/938] mean_loss : 0.269
[5/15] [500/938] mean_loss : 0.108
[6/15] [000/938] mean_loss : 0.271
[6/15] [500/938] mean_loss : 0.095
[7/15] [000/938] mean_loss : 0.191
[7/15] [500/938] mean_loss : 0.085
[8/15] [000/938] mean_loss : 0.058
[8/15] [500/938] mean_loss : 0.077
[9/15] [000/938] mean_loss : 0.036
[9/15] [500/938] mean_loss : 0.078
[10/15] [000/938] mean_loss : 0.161
[10/15] [500/938] mean_loss : 0.071
[11/15] [000/938] mean_loss : 0.068
[11/15] [500/938] mean_loss : 0.066
[12/15] [000/938] mean_loss : 0.080
[12/15] [500/938] mean_loss : 0.062
[13/15] [000/938] mean_loss : 0.090
[13/15] [500/938] mean_loss : 0.058
[14/15] [000

# 4. evaluation

In [7]:
def evaluation(data_loader, model):
    model.eval() # for dropout at test time!
    loss_function= nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for inputs, targets in data_loader:
        pred= model(inputs.view(len(inputs),-1))
        losses += loss_function(pred, targets).data.tolist()
        outputs= pred.max(1)[1] # argmax
        num_equal += torch.eq(outputs, targets).sum().tolist()
        
    return num_equal/ len(data_loader.dataset), losses/len(data_loader.dataset) 

In [8]:
train_acc, train_loss= evaluation(train_loader, model)
test_acc, test_loss= evaluation(test_loader, model)

print('<train acc : > {} / <train loss> : {}'.format(round(train_acc,3), round(train_loss,3)))
print('<test  acc : > {} / <test  loss> : {}'.format(round(test_acc,3), round(test_loss,3)))



<train acc : > 0.994 / <train loss> : 0.019
<test  acc : > 0.983 / <test  loss> : 0.057
