In [4]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
import pickle,os,shutil
torch.manual_seed(1)

<torch._C.Generator at 0x106761b50>

# What is a learning_rate_decay


# 0. 텐서보드

In [5]:
port= '6006'
try:
    shutil.rmtree('runs/')
except:
    pass

# 1. 데이터

In [6]:
BATCH_SIZE= 64

train_dataset = vdatasets.MNIST(root='../data/MNIST/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2,
                                           drop_last=True) # 이동평균이 튀는걸 방지

test_dataset = vdatasets.MNIST(root='../data/MNIST/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

# 2. model

In [7]:
class NN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NN, self).__init__()
        self.linear1= nn.Linear(input_size, hidden_size)
        self.linear2= nn.Linear(hidden_size, output_size)
        
        # In : {배치사이즈, 차원수} => Out : (배치사이즈, 차원수)
        self.bn1= nn.BatchNorm1d(hidden_size)
    def forward(self, inputs):
        outputs= self.bn1(self.linear1(inputs))
        outputs= F.relu(outputs)
        return self.linear2(outputs)

# 3. Learning rate scheduling
### 3.1 StepLR

In [8]:
EPOCH=4
LR=0.1
model=NN(784, 512, 10)
optimizer= optim.SGD(model.parameters(), lr=LR)
loss_function= nn.CrossEntropyLoss()

In [9]:
writer= SummaryWriter(comment="-batch-norm")

In [10]:
%%time
scheduler= optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        model.zero_grad()
        pred= model(inputs.view(len(inputs),-1))
        loss= loss_function(pred, targets)
        loss.backward()
        optimizer.step()
        
        losses.append(loss.data.item())
        if i % 100==0:
            avg_loss= np.mean(losses)
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),avg_loss))
            writer.add_scalars('data/step/',{'steplr': avg_loss}, (i+1)+(epoch*len(train_loader)))
    
    scheduler.step()
    print(round(scheduler.get_lr()[0],6))

[0/4] [000/937] mean_loss : 2.244
[0/4] [100/937] mean_loss : 0.628
[0/4] [200/937] mean_loss : 0.474
[0/4] [300/937] mean_loss : 0.409
[0/4] [400/937] mean_loss : 0.367
[0/4] [500/937] mean_loss : 0.340
[0/4] [600/937] mean_loss : 0.318
[0/4] [700/937] mean_loss : 0.300
[0/4] [800/937] mean_loss : 0.284
[0/4] [900/937] mean_loss : 0.270
0.1
[1/4] [000/937] mean_loss : 0.110
[1/4] [100/937] mean_loss : 0.143
[1/4] [200/937] mean_loss : 0.141
[1/4] [300/937] mean_loss : 0.140
[1/4] [400/937] mean_loss : 0.137
[1/4] [500/937] mean_loss : 0.135
[1/4] [600/937] mean_loss : 0.132
[1/4] [700/937] mean_loss : 0.131
[1/4] [800/937] mean_loss : 0.129
[1/4] [900/937] mean_loss : 0.127
0.01
[2/4] [000/937] mean_loss : 0.025
[2/4] [100/937] mean_loss : 0.102
[2/4] [200/937] mean_loss : 0.096
[2/4] [300/937] mean_loss : 0.093
[2/4] [400/937] mean_loss : 0.091
[2/4] [500/937] mean_loss : 0.091
[2/4] [600/937] mean_loss : 0.089
[2/4] [700/937] mean_loss : 0.088
[2/4] [800/937] mean_loss : 0.088
[2/4]

### 3.2 MultiStepLR


In [11]:
LR=0.1
model= NN(784,512,10)
optimizer= optim.SGD(model.parameters(), lr=LR)
loss_function= nn.CrossEntropyLoss()

In [13]:
scheduler= optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2], gamma=0.1)
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        model.zero_grad()
        pred= model(inputs.view(-1, 784))
        loss= loss_function(pred, targets)
        losses.append(loss.data.item())
        loss.backward()
        optimizer.step()
        
        if i % 100==0:
            avg_loss= np.mean(losses)
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),avg_loss))
            writer.add_scalars('data/step/',{'multistep': avg_loss}, (i+1)+(epoch*len(train_loader)))

    scheduler.step()
    print(round(scheduler.get_lr()[0],6))

[0/4] [000/937] mean_loss : 0.049
[0/4] [100/937] mean_loss : 0.078
[0/4] [200/937] mean_loss : 0.083
[0/4] [300/937] mean_loss : 0.087
[0/4] [400/937] mean_loss : 0.087
[0/4] [500/937] mean_loss : 0.087
[0/4] [600/937] mean_loss : 0.087
[0/4] [700/937] mean_loss : 0.086
[0/4] [800/937] mean_loss : 0.084
[0/4] [900/937] mean_loss : 0.084
0.1
[1/4] [000/937] mean_loss : 0.078
[1/4] [100/937] mean_loss : 0.071
[1/4] [200/937] mean_loss : 0.067
[1/4] [300/937] mean_loss : 0.067
[1/4] [400/937] mean_loss : 0.067
[1/4] [500/937] mean_loss : 0.067
[1/4] [600/937] mean_loss : 0.067
[1/4] [700/937] mean_loss : 0.066
[1/4] [800/937] mean_loss : 0.066
[1/4] [900/937] mean_loss : 0.066
0.1
[2/4] [000/937] mean_loss : 0.022
[2/4] [100/937] mean_loss : 0.047
[2/4] [200/937] mean_loss : 0.051
[2/4] [300/937] mean_loss : 0.052
[2/4] [400/937] mean_loss : 0.051
[2/4] [500/937] mean_loss : 0.052
[2/4] [600/937] mean_loss : 0.053
[2/4] [700/937] mean_loss : 0.054
[2/4] [800/937] mean_loss : 0.053
[2/4] 

In [None]:
!tensorboard --logdir runs --port 6006