In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
import pickle,os,shutil
torch.manual_seed(1)

<torch._C.Generator at 0x108d3de70>

# What is a Weight initialization?
- 딥러닝 모델은 weight의 초기화가 중요함. 
- weight초기화에 따라서 학습 속도나 saddle point에 빠질 가능성이 다르다.-> 모델 성능 좌우

 <img src = './images/weight_initialization.png' width= 300> 


In [2]:
port= '6006'

# 텐서보드 데이터 파일 초기화
try:
    shutil.rmtree('runs/')
except:
    pass

# 0. init operation

In [3]:
class NN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NN, self).__init__()
        self.linear1= nn.Linear(input_size,hidden_size)
        self.linear2= nn.Linear(hidden_size, hidden_size)
        self.linear3= nn.Linear(hidden_size, output_size)
        
    def init_weight(self, init_op=None):
        for name, param in self.named_parameters():
            if 'weight' in name:
                    if init_op:
                        print(init_op.__name__)
                        param.data=init_op(param.data)
                
    def forward(self, inputs):
        outputs= F.relu(self.linear1(inputs))
        outputs= F.relu(self.linear2(outputs))
        return self.linear3(outputs)

In [4]:
INPUT_SIZE= 784
HIDDEN_SIZE=1024
OUTPUT_SIZE=10

model=NN(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
model.init_weight(nn.init.xavier_normal_)

xavier_normal_
xavier_normal_
xavier_normal_


In [5]:
param = torch.zeros(10,2)

param

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])

In [6]:
nn.init.uniform_(param, 0, 1)

tensor([[0.4688, 0.0987],
        [0.3022, 0.7655],
        [0.5121, 0.7124],
        [0.8003, 0.5404],
        [0.5419, 0.2992],
        [0.8381, 0.7065],
        [0.9928, 0.2173],
        [0.4234, 0.9057],
        [0.5621, 0.4820],
        [0.3548, 0.6176]])

In [7]:
nn.init.normal_(param,0,1)

tensor([[ 0.0564, -0.2130],
        [ 1.5743, -1.1243],
        [-0.7811, -0.0694],
        [-0.2208, -2.2898],
        [ 1.7076,  1.4199],
        [-0.4382, -0.1161],
        [-0.2865, -0.1408],
        [ 1.0070, -0.5401],
        [-0.7316, -0.9131],
        [-0.5705,  0.5016]])

In [8]:
nn.init.constant_(param,1)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [9]:
nn.init.xavier_uniform_(param)

tensor([[ 0.1851,  0.3072],
        [-0.1578,  0.7031],
        [ 0.4530,  0.4093],
        [ 0.2209, -0.5772],
        [ 0.3094,  0.1635],
        [ 0.3197,  0.0470],
        [-0.6948,  0.2600],
        [-0.2312,  0.5589],
        [-0.4113,  0.6521],
        [-0.1810,  0.3387]])

In [10]:
nn.init.xavier_normal_(param)

tensor([[ 0.1985,  0.5769],
        [-0.3624, -0.7879],
        [-0.6563,  0.5113],
        [ 0.0036,  0.3797],
        [-0.2453,  0.3223],
        [ 0.3755,  0.6599],
        [ 0.7216, -0.1670],
        [-0.4652,  0.1962],
        [ 0.3644,  0.0838],
        [ 0.1378, -0.0207]])

# 1. weight init 효과 분석

In [11]:
INIT_OPS= [None, nn.init.xavier_normal_]
EPOCH=3
LR=0.01
BATCH_SIZE=64

INPUT_SIZE= 784
HIDDEN_SIZE=1024
OUTPUT_SIZE=10
train_dataset= vdatasets.MNIST(root='../data/MNIST',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=False)

train_loader= torch.utils.data.DataLoader(dataset=train_dataset,
                                         batch_size=BATCH_SIZE,
                                         shuffle=True,
                                         num_workers=2)

In [12]:
# 텐서보드 데이터 파일 초기화
try:
    shutil.rmtree('runs/')
except:
    pass

for INIT_OP in INIT_OPS:
    op_name= INIT_OP.__name__ if INIT_OP else "None"
    writer= SummaryWriter(comment="-"+op_name)
    
    # 모델 선언
    model= NN(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
    if INIT_OP is not None:
        model.init_weight(INIT_OP)
    
    loss_function= nn.CrossEntropyLoss()
    optimizer= optim.SGD(model.parameters(), lr=LR)
    
    # 트레이닝
    print(op_name + " training start! ")
    for epoch in range(EPOCH):
        for i, (inputs, targets) in enumerate(train_loader):
            model.zero_grad()
            outputs = model(inputs.view(-1,784))
            loss = loss_function(outputs, targets)
            loss.backward()
            optimizer.step()
            
            writer.add_scalars('data/weight_init/',{op_name : loss.data.item()},(i+1)+(epoch*len(train_loader)))
    print("done")
    
writer.close()

None training start! 
done
xavier_normal_
xavier_normal_
xavier_normal_
xavier_normal_ training start! 
done


In [13]:
port

'6006'

In [None]:
!tensorboard --logdir runs --port 6006