## 파일 설명
| 파일명 | 파일 용도 | 관련 절 | 페이지 |
|:--   |:--      |:--    |:--      |
| batch_norm_gradient_check.py | 배치 정규화를 구현한 신경망의 오차역전파법 방식의 기울기 계산이 정확한지 확인합니다(기울기 확인). |  |  |
| batch_norm_test.py | MNIST 데이터셋 학습에 배치 정규화를 적용해봅니다. | 6.3.2 배치 정규화의 효과 | 212 |
| hyperparameter_optimization.py | 무작위로 추출한 값부터 시작하여 두 하이퍼파라미터(가중치 감소 계수, 학습률)를 최적화해봅니다. | 6.5.3 하이퍼파라미터 최적화 구현하기 | 224 |
| optimizer_compare_mnist.py | SGD, 모멘텀, AdaGrad, Adam의 학습 속도를 비교합니다. | 6.1.8 MNIST 데이터셋으로 본 갱신 방법 비교 | 201 |
| optimizer_compare_naive.py | SGD, 모멘텀, AdaGrad, Adam의 학습 패턴을 비교합니다. | 6.1.7 어느 갱신 방법을 이용할 것인가? | 200 |
| overfit_dropout.py | 일부러 오버피팅을 일으킨 후 드롭아웃(dropout)의 효과를 관찰합니다. | 6.4.3 드롭아웃 | 219 |
| overfit_weight_decay.py | 일부러 오버피팅을 일으킨 후 가중치 감소(weight_decay)의 효과를 관찰합니다. | 6.4.1 오버피팅 | 215 |
| weight_init_activation_histogram.py | 활성화 함수로 시그모이드 함수를 사용하는 5층 신경망에 무작위로 생성한 입력 데이터를 흘리며 각 층의 활성화값 분포를 히스토그램으로 그려봅니다. | 6.2.2 은닉층의 활성화값 분포 | 203 |
| weight_init_compare.py | 가중치 초깃값(std=0.01, He, Xavier)에 따른 학습 속도를 비교합니다. | 6.2.4 MNIST 데이터셋으로 본 가중치 초깃값 비교 | 209 |


# batch_norm_gradient_check.py

In [1]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from dataset.mnist import load_mnist
from common.multi_layer_net_extend import MultiLayerNetExtend

# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100], output_size=10,
                              use_batchnorm=True)

x_batch = x_train[:1]
t_batch = t_train[:1]

grad_backprop = network.gradient(x_batch, t_batch)
grad_numerical = network.numerical_gradient(x_batch, t_batch)


for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
    print(key + ":" + str(diff))

W1:0.0
b1:0.0
gamma1:0.0
beta1:0.0
W2:0.0
b2:0.0
gamma2:0.0
beta2:0.04636255680514623
W3:0.0
b3:1.7990402263745597e-07


In [None]:
"""
W1:0.0
b1:0.0
gamma1:0.0
beta1:0.0
W2:0.0
b2:0.0
gamma2:0.0
beta2:0.04636255680514623
W3:0.0
b3:1.7990402263745597e-07
"""

# batch_norm_test.py

In [2]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.multi_layer_net_extend import MultiLayerNetExtend
from common.optimizer import SGD, Adam

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 학습 데이터를 줄임
x_train = x_train[:1000]
t_train = t_train[:1000]

max_epochs = 20
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.01


def __train(weight_init_std):
    bn_network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, 
                                    weight_init_std=weight_init_std, use_batchnorm=True)
    network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10,
                                weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)
    
    train_acc_list = []
    bn_train_acc_list = []
    
    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0
    
    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]
    
        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)
    
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)
    
            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc))
    
            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break
                
    return train_acc_list, bn_train_acc_list


# 그래프 그리기==========
weight_scale_list = np.logspace(0, -4, num=16)
x = np.arange(max_epochs)

for i, w in enumerate(weight_scale_list):
    print( "============== " + str(i+1) + "/16" + " ==============")
    train_acc_list, bn_train_acc_list = __train(w)
    
    plt.subplot(4,4,i+1)
    plt.title("W:" + str(w))
    if i == 15:
        plt.plot(x, bn_train_acc_list, label='Batch Normalization', markevery=2)
        plt.plot(x, train_acc_list, linestyle = "--", label='Normal(without BatchNorm)', markevery=2)
    else:
        plt.plot(x, bn_train_acc_list, markevery=2)
        plt.plot(x, train_acc_list, linestyle="--", markevery=2)

    plt.ylim(0, 1.0)
    if i % 4:
        plt.yticks([])
    else:
        plt.ylabel("accuracy")
    if i < 12:
        plt.xticks([])
    else:
        plt.xlabel("epochs")
    plt.legend(loc='lower right')
    
plt.show()

epoch:0 | 0.093 - 0.094


  self.mask = (x <= 0)
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)


epoch:1 | 0.097 - 0.107
epoch:2 | 0.097 - 0.137
epoch:3 | 0.097 - 0.165
epoch:4 | 0.097 - 0.183
epoch:5 | 0.097 - 0.205
epoch:6 | 0.097 - 0.227
epoch:7 | 0.097 - 0.247
epoch:8 | 0.097 - 0.27
epoch:9 | 0.097 - 0.287
epoch:10 | 0.097 - 0.309
epoch:11 | 0.097 - 0.324
epoch:12 | 0.097 - 0.345
epoch:13 | 0.097 - 0.345
epoch:14 | 0.097 - 0.367
epoch:15 | 0.097 - 0.384
epoch:16 | 0.097 - 0.387
epoch:17 | 0.097 - 0.407
epoch:18 | 0.097 - 0.409


No handles with labels found to put in legend.


epoch:19 | 0.097 - 0.421
epoch:0 | 0.094 - 0.105


  self.mask = (x <= 0)
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)


epoch:1 | 0.097 - 0.088
epoch:2 | 0.097 - 0.095
epoch:3 | 0.097 - 0.118
epoch:4 | 0.097 - 0.142
epoch:5 | 0.097 - 0.151
epoch:6 | 0.097 - 0.192
epoch:7 | 0.097 - 0.21
epoch:8 | 0.097 - 0.228
epoch:9 | 0.097 - 0.251
epoch:10 | 0.097 - 0.271
epoch:11 | 0.097 - 0.3
epoch:12 | 0.097 - 0.322
epoch:13 | 0.097 - 0.348
epoch:14 | 0.097 - 0.361
epoch:15 | 0.097 - 0.392
epoch:16 | 0.097 - 0.401
epoch:17 | 0.097 - 0.422
epoch:18 | 0.097 - 0.442


No handles with labels found to put in legend.


epoch:19 | 0.097 - 0.459
epoch:0 | 0.146 - 0.078
epoch:1 | 0.305 - 0.103
epoch:2 | 0.461 - 0.149
epoch:3 | 0.558 - 0.173
epoch:4 | 0.633 - 0.193
epoch:5 | 0.673 - 0.231
epoch:6 | 0.743 - 0.258
epoch:7 | 0.777 - 0.298
epoch:8 | 0.822 - 0.332
epoch:9 | 0.848 - 0.364
epoch:10 | 0.866 - 0.399
epoch:11 | 0.885 - 0.44
epoch:12 | 0.902 - 0.471
epoch:13 | 0.919 - 0.507
epoch:14 | 0.927 - 0.525
epoch:15 | 0.934 - 0.543
epoch:16 | 0.939 - 0.57
epoch:17 | 0.953 - 0.586
epoch:18 | 0.961 - 0.609


No handles with labels found to put in legend.


epoch:19 | 0.966 - 0.626
epoch:0 | 0.098 - 0.117
epoch:1 | 0.226 - 0.135
epoch:2 | 0.378 - 0.183
epoch:3 | 0.469 - 0.248
epoch:4 | 0.545 - 0.31
epoch:5 | 0.607 - 0.369
epoch:6 | 0.64 - 0.439
epoch:7 | 0.664 - 0.481
epoch:8 | 0.713 - 0.519
epoch:9 | 0.729 - 0.571
epoch:10 | 0.761 - 0.59
epoch:11 | 0.766 - 0.63
epoch:12 | 0.781 - 0.655
epoch:13 | 0.802 - 0.678
epoch:14 | 0.809 - 0.698
epoch:15 | 0.823 - 0.72
epoch:16 | 0.84 - 0.734
epoch:17 | 0.839 - 0.757
epoch:18 | 0.852 - 0.773


No handles with labels found to put in legend.


epoch:19 | 0.858 - 0.78
epoch:0 | 0.115 - 0.102
epoch:1 | 0.126 - 0.143
epoch:2 | 0.13 - 0.266
epoch:3 | 0.143 - 0.411
epoch:4 | 0.159 - 0.5
epoch:5 | 0.174 - 0.579
epoch:6 | 0.183 - 0.628
epoch:7 | 0.186 - 0.681
epoch:8 | 0.196 - 0.702
epoch:9 | 0.201 - 0.736
epoch:10 | 0.206 - 0.766
epoch:11 | 0.211 - 0.789
epoch:12 | 0.215 - 0.809
epoch:13 | 0.221 - 0.826
epoch:14 | 0.222 - 0.835
epoch:15 | 0.237 - 0.844
epoch:16 | 0.236 - 0.861
epoch:17 | 0.238 - 0.872
epoch:18 | 0.25 - 0.874


No handles with labels found to put in legend.


epoch:19 | 0.247 - 0.881
epoch:0 | 0.093 - 0.128
epoch:1 | 0.116 - 0.271
epoch:2 | 0.135 - 0.424
epoch:3 | 0.122 - 0.545
epoch:4 | 0.116 - 0.639
epoch:5 | 0.116 - 0.707
epoch:6 | 0.116 - 0.762
epoch:7 | 0.116 - 0.786
epoch:8 | 0.116 - 0.812
epoch:9 | 0.116 - 0.84
epoch:10 | 0.116 - 0.858
epoch:11 | 0.116 - 0.876
epoch:12 | 0.116 - 0.888
epoch:13 | 0.116 - 0.903
epoch:14 | 0.116 - 0.911
epoch:15 | 0.116 - 0.919
epoch:16 | 0.116 - 0.925
epoch:17 | 0.116 - 0.93
epoch:18 | 0.116 - 0.937


No handles with labels found to put in legend.


epoch:19 | 0.116 - 0.944
epoch:0 | 0.097 - 0.163
epoch:1 | 0.117 - 0.271
epoch:2 | 0.117 - 0.589
epoch:3 | 0.117 - 0.678
epoch:4 | 0.117 - 0.734
epoch:5 | 0.117 - 0.769
epoch:6 | 0.117 - 0.813
epoch:7 | 0.117 - 0.841
epoch:8 | 0.117 - 0.872
epoch:9 | 0.117 - 0.894
epoch:10 | 0.117 - 0.917
epoch:11 | 0.117 - 0.935
epoch:12 | 0.117 - 0.945
epoch:13 | 0.117 - 0.957
epoch:14 | 0.117 - 0.966
epoch:15 | 0.117 - 0.969
epoch:16 | 0.117 - 0.975
epoch:17 | 0.117 - 0.978
epoch:18 | 0.117 - 0.983


No handles with labels found to put in legend.


epoch:19 | 0.117 - 0.985
epoch:0 | 0.1 - 0.136
epoch:1 | 0.117 - 0.4
epoch:2 | 0.116 - 0.66
epoch:3 | 0.116 - 0.753
epoch:4 | 0.116 - 0.796
epoch:5 | 0.116 - 0.843
epoch:6 | 0.116 - 0.863
epoch:7 | 0.116 - 0.881
epoch:8 | 0.116 - 0.907
epoch:9 | 0.116 - 0.927
epoch:10 | 0.116 - 0.953
epoch:11 | 0.116 - 0.97
epoch:12 | 0.116 - 0.981
epoch:13 | 0.116 - 0.992
epoch:14 | 0.116 - 0.992
epoch:15 | 0.116 - 0.994
epoch:16 | 0.116 - 0.996
epoch:17 | 0.116 - 0.996
epoch:18 | 0.116 - 0.996


No handles with labels found to put in legend.


epoch:19 | 0.116 - 0.997
epoch:0 | 0.099 - 0.135
epoch:1 | 0.116 - 0.616
epoch:2 | 0.116 - 0.705
epoch:3 | 0.116 - 0.771
epoch:4 | 0.116 - 0.853
epoch:5 | 0.116 - 0.927
epoch:6 | 0.116 - 0.956
epoch:7 | 0.116 - 0.971
epoch:8 | 0.116 - 0.985
epoch:9 | 0.116 - 0.988
epoch:10 | 0.116 - 0.991
epoch:11 | 0.116 - 0.997
epoch:12 | 0.116 - 0.976
epoch:13 | 0.116 - 0.999
epoch:14 | 0.116 - 1.0
epoch:15 | 0.116 - 1.0
epoch:16 | 0.116 - 1.0
epoch:17 | 0.116 - 1.0
epoch:18 | 0.116 - 1.0


No handles with labels found to put in legend.


epoch:19 | 0.116 - 1.0
epoch:0 | 0.087 - 0.119
epoch:1 | 0.117 - 0.537
epoch:2 | 0.117 - 0.67
epoch:3 | 0.116 - 0.746
epoch:4 | 0.116 - 0.762
epoch:5 | 0.116 - 0.764
epoch:6 | 0.116 - 0.8
epoch:7 | 0.116 - 0.801
epoch:8 | 0.116 - 0.807
epoch:9 | 0.116 - 0.847
epoch:10 | 0.116 - 0.865
epoch:11 | 0.116 - 0.871
epoch:12 | 0.116 - 0.935
epoch:13 | 0.116 - 0.939
epoch:14 | 0.116 - 0.966
epoch:15 | 0.116 - 0.977
epoch:16 | 0.117 - 0.987
epoch:17 | 0.117 - 0.989
epoch:18 | 0.117 - 0.99


No handles with labels found to put in legend.


epoch:19 | 0.116 - 0.991
epoch:0 | 0.094 - 0.119
epoch:1 | 0.116 - 0.5
epoch:2 | 0.117 - 0.675
epoch:3 | 0.117 - 0.653
epoch:4 | 0.117 - 0.705
epoch:5 | 0.117 - 0.762
epoch:6 | 0.117 - 0.725
epoch:7 | 0.116 - 0.808
epoch:8 | 0.116 - 0.871
epoch:9 | 0.116 - 0.761
epoch:10 | 0.116 - 0.933
epoch:11 | 0.116 - 0.915
epoch:12 | 0.116 - 0.973
epoch:13 | 0.116 - 0.968
epoch:14 | 0.116 - 0.981
epoch:15 | 0.116 - 0.953
epoch:16 | 0.116 - 0.985
epoch:17 | 0.116 - 0.988
epoch:18 | 0.116 - 0.987


No handles with labels found to put in legend.


epoch:19 | 0.116 - 0.995
epoch:0 | 0.1 - 0.109
epoch:1 | 0.117 - 0.471
epoch:2 | 0.117 - 0.621
epoch:3 | 0.117 - 0.669
epoch:4 | 0.117 - 0.671
epoch:5 | 0.117 - 0.672
epoch:6 | 0.117 - 0.753
epoch:7 | 0.117 - 0.754
epoch:8 | 0.117 - 0.737
epoch:9 | 0.117 - 0.767
epoch:10 | 0.117 - 0.739
epoch:11 | 0.117 - 0.787
epoch:12 | 0.117 - 0.788
epoch:13 | 0.117 - 0.798
epoch:14 | 0.117 - 0.845
epoch:15 | 0.117 - 0.866
epoch:16 | 0.117 - 0.911
epoch:17 | 0.117 - 0.965
epoch:18 | 0.117 - 0.969


No handles with labels found to put in legend.


epoch:19 | 0.117 - 0.986
epoch:0 | 0.1 - 0.197
epoch:1 | 0.117 - 0.486
epoch:2 | 0.117 - 0.519
epoch:3 | 0.116 - 0.574
epoch:4 | 0.116 - 0.577
epoch:5 | 0.116 - 0.653
epoch:6 | 0.117 - 0.68
epoch:7 | 0.116 - 0.7
epoch:8 | 0.117 - 0.678
epoch:9 | 0.117 - 0.703
epoch:10 | 0.117 - 0.744
epoch:11 | 0.117 - 0.804
epoch:12 | 0.117 - 0.799
epoch:13 | 0.117 - 0.737
epoch:14 | 0.117 - 0.807
epoch:15 | 0.117 - 0.805
epoch:16 | 0.117 - 0.806
epoch:17 | 0.117 - 0.812
epoch:18 | 0.117 - 0.813


No handles with labels found to put in legend.


epoch:19 | 0.117 - 0.814
epoch:0 | 0.1 - 0.1
epoch:1 | 0.116 - 0.261
epoch:2 | 0.116 - 0.464
epoch:3 | 0.116 - 0.38
epoch:4 | 0.116 - 0.479
epoch:5 | 0.116 - 0.489
epoch:6 | 0.117 - 0.474
epoch:7 | 0.116 - 0.472
epoch:8 | 0.116 - 0.503
epoch:9 | 0.116 - 0.491
epoch:10 | 0.116 - 0.504
epoch:11 | 0.116 - 0.471
epoch:12 | 0.116 - 0.516
epoch:13 | 0.116 - 0.474
epoch:14 | 0.116 - 0.453
epoch:15 | 0.116 - 0.484
epoch:16 | 0.116 - 0.481
epoch:17 | 0.116 - 0.437


No handles with labels found to put in legend.


epoch:18 | 0.117 - 0.443
epoch:19 | 0.117 - 0.411
epoch:0 | 0.116 - 0.122
epoch:1 | 0.116 - 0.301
epoch:2 | 0.116 - 0.407
epoch:3 | 0.117 - 0.409
epoch:4 | 0.117 - 0.413
epoch:5 | 0.117 - 0.495
epoch:6 | 0.117 - 0.481
epoch:7 | 0.117 - 0.507
epoch:8 | 0.117 - 0.513
epoch:9 | 0.117 - 0.519
epoch:10 | 0.117 - 0.509
epoch:11 | 0.117 - 0.511
epoch:12 | 0.117 - 0.52
epoch:13 | 0.117 - 0.519
epoch:14 | 0.117 - 0.493
epoch:15 | 0.117 - 0.492
epoch:16 | 0.117 - 0.506
epoch:17 | 0.117 - 0.507
epoch:18 | 0.117 - 0.499
epoch:19 | 0.117 - 0.51


No handles with labels found to put in legend.


epoch:0 | 0.092 - 0.156
epoch:1 | 0.117 - 0.233
epoch:2 | 0.116 - 0.378
epoch:3 | 0.117 - 0.41
epoch:4 | 0.117 - 0.438
epoch:5 | 0.117 - 0.426
epoch:6 | 0.117 - 0.43
epoch:7 | 0.117 - 0.421
epoch:8 | 0.117 - 0.43
epoch:9 | 0.117 - 0.431
epoch:10 | 0.117 - 0.395
epoch:11 | 0.117 - 0.421
epoch:12 | 0.117 - 0.53
epoch:13 | 0.117 - 0.513
epoch:14 | 0.117 - 0.507
epoch:15 | 0.117 - 0.508
epoch:16 | 0.117 - 0.529
epoch:17 | 0.117 - 0.533
epoch:18 | 0.117 - 0.614
epoch:19 | 0.117 - 0.621


<Figure size 640x480 with 16 Axes>

In [8]:
"""

============== 1/16 ==============
epoch:0 | 0.093 - 0.094
/Users/csg/HomeWorkspace/git/DL-scratch1/common/layers.py:12: RuntimeWarning: invalid value encountered in less_equal
  self.mask = (x <= 0)
/Users/csg/HomeWorkspace/git/DL-scratch1/common/multi_layer_net_extend.py:104: RuntimeWarning: overflow encountered in square
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
/Users/csg/HomeWorkspace/git/DL-scratch1/common/multi_layer_net_extend.py:104: RuntimeWarning: invalid value encountered in double_scalars
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
epoch:1 | 0.097 - 0.107
epoch:2 | 0.097 - 0.137
epoch:3 | 0.097 - 0.165
epoch:4 | 0.097 - 0.183
epoch:5 | 0.097 - 0.205
epoch:6 | 0.097 - 0.227
epoch:7 | 0.097 - 0.247
epoch:8 | 0.097 - 0.27
epoch:9 | 0.097 - 0.287
epoch:10 | 0.097 - 0.309
epoch:11 | 0.097 - 0.324
epoch:12 | 0.097 - 0.345
epoch:13 | 0.097 - 0.345
epoch:14 | 0.097 - 0.367
epoch:15 | 0.097 - 0.384
epoch:16 | 0.097 - 0.387
epoch:17 | 0.097 - 0.407
epoch:18 | 0.097 - 0.409
No handles with labels found to put in legend.
epoch:19 | 0.097 - 0.421
============== 2/16 ==============
epoch:0 | 0.094 - 0.105
/Users/csg/HomeWorkspace/git/DL-scratch1/common/layers.py:12: RuntimeWarning: invalid value encountered in less_equal
  self.mask = (x <= 0)
/Users/csg/HomeWorkspace/git/DL-scratch1/common/multi_layer_net_extend.py:104: RuntimeWarning: overflow encountered in square
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
/Users/csg/HomeWorkspace/git/DL-scratch1/common/multi_layer_net_extend.py:104: RuntimeWarning: invalid value encountered in double_scalars
  weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
epoch:1 | 0.097 - 0.088
epoch:2 | 0.097 - 0.095
epoch:3 | 0.097 - 0.118
epoch:4 | 0.097 - 0.142
epoch:5 | 0.097 - 0.151
epoch:6 | 0.097 - 0.192
epoch:7 | 0.097 - 0.21
epoch:8 | 0.097 - 0.228
epoch:9 | 0.097 - 0.251
epoch:10 | 0.097 - 0.271
epoch:11 | 0.097 - 0.3
epoch:12 | 0.097 - 0.322
epoch:13 | 0.097 - 0.348
epoch:14 | 0.097 - 0.361
epoch:15 | 0.097 - 0.392
epoch:16 | 0.097 - 0.401
epoch:17 | 0.097 - 0.422
epoch:18 | 0.097 - 0.442
No handles with labels found to put in legend.
epoch:19 | 0.097 - 0.459
============== 3/16 ==============
epoch:0 | 0.146 - 0.078
epoch:1 | 0.305 - 0.103
epoch:2 | 0.461 - 0.149
epoch:3 | 0.558 - 0.173
epoch:4 | 0.633 - 0.193
epoch:5 | 0.673 - 0.231
epoch:6 | 0.743 - 0.258
epoch:7 | 0.777 - 0.298
epoch:8 | 0.822 - 0.332
epoch:9 | 0.848 - 0.364
epoch:10 | 0.866 - 0.399
epoch:11 | 0.885 - 0.44
epoch:12 | 0.902 - 0.471
epoch:13 | 0.919 - 0.507
epoch:14 | 0.927 - 0.525
epoch:15 | 0.934 - 0.543
epoch:16 | 0.939 - 0.57
epoch:17 | 0.953 - 0.586
epoch:18 | 0.961 - 0.609
No handles with labels found to put in legend.
epoch:19 | 0.966 - 0.626
============== 4/16 ==============
epoch:0 | 0.098 - 0.117
epoch:1 | 0.226 - 0.135
epoch:2 | 0.378 - 0.183
epoch:3 | 0.469 - 0.248
epoch:4 | 0.545 - 0.31
epoch:5 | 0.607 - 0.369
epoch:6 | 0.64 - 0.439
epoch:7 | 0.664 - 0.481
epoch:8 | 0.713 - 0.519
epoch:9 | 0.729 - 0.571
epoch:10 | 0.761 - 0.59
epoch:11 | 0.766 - 0.63
epoch:12 | 0.781 - 0.655
epoch:13 | 0.802 - 0.678
epoch:14 | 0.809 - 0.698
epoch:15 | 0.823 - 0.72
epoch:16 | 0.84 - 0.734
epoch:17 | 0.839 - 0.757
epoch:18 | 0.852 - 0.773
No handles with labels found to put in legend.
epoch:19 | 0.858 - 0.78
============== 5/16 ==============
epoch:0 | 0.115 - 0.102
epoch:1 | 0.126 - 0.143
epoch:2 | 0.13 - 0.266
epoch:3 | 0.143 - 0.411
epoch:4 | 0.159 - 0.5
epoch:5 | 0.174 - 0.579
epoch:6 | 0.183 - 0.628
epoch:7 | 0.186 - 0.681
epoch:8 | 0.196 - 0.702
epoch:9 | 0.201 - 0.736
epoch:10 | 0.206 - 0.766
epoch:11 | 0.211 - 0.789
epoch:12 | 0.215 - 0.809
epoch:13 | 0.221 - 0.826
epoch:14 | 0.222 - 0.835
epoch:15 | 0.237 - 0.844
epoch:16 | 0.236 - 0.861
epoch:17 | 0.238 - 0.872
epoch:18 | 0.25 - 0.874
No handles with labels found to put in legend.
epoch:19 | 0.247 - 0.881
============== 6/16 ==============
epoch:0 | 0.093 - 0.128
epoch:1 | 0.116 - 0.271
epoch:2 | 0.135 - 0.424
epoch:3 | 0.122 - 0.545
epoch:4 | 0.116 - 0.639
epoch:5 | 0.116 - 0.707
epoch:6 | 0.116 - 0.762
epoch:7 | 0.116 - 0.786
epoch:8 | 0.116 - 0.812
epoch:9 | 0.116 - 0.84
epoch:10 | 0.116 - 0.858
epoch:11 | 0.116 - 0.876
epoch:12 | 0.116 - 0.888
epoch:13 | 0.116 - 0.903
epoch:14 | 0.116 - 0.911
epoch:15 | 0.116 - 0.919
epoch:16 | 0.116 - 0.925
epoch:17 | 0.116 - 0.93
epoch:18 | 0.116 - 0.937
No handles with labels found to put in legend.
epoch:19 | 0.116 - 0.944
============== 7/16 ==============
epoch:0 | 0.097 - 0.163
epoch:1 | 0.117 - 0.271
epoch:2 | 0.117 - 0.589
epoch:3 | 0.117 - 0.678
epoch:4 | 0.117 - 0.734
epoch:5 | 0.117 - 0.769
epoch:6 | 0.117 - 0.813
epoch:7 | 0.117 - 0.841
epoch:8 | 0.117 - 0.872
epoch:9 | 0.117 - 0.894
epoch:10 | 0.117 - 0.917
epoch:11 | 0.117 - 0.935
epoch:12 | 0.117 - 0.945
epoch:13 | 0.117 - 0.957
epoch:14 | 0.117 - 0.966
epoch:15 | 0.117 - 0.969
epoch:16 | 0.117 - 0.975
epoch:17 | 0.117 - 0.978
epoch:18 | 0.117 - 0.983
No handles with labels found to put in legend.
epoch:19 | 0.117 - 0.985
============== 8/16 ==============
epoch:0 | 0.1 - 0.136
epoch:1 | 0.117 - 0.4
epoch:2 | 0.116 - 0.66
epoch:3 | 0.116 - 0.753
epoch:4 | 0.116 - 0.796
epoch:5 | 0.116 - 0.843
epoch:6 | 0.116 - 0.863
epoch:7 | 0.116 - 0.881
epoch:8 | 0.116 - 0.907
epoch:9 | 0.116 - 0.927
epoch:10 | 0.116 - 0.953
epoch:11 | 0.116 - 0.97
epoch:12 | 0.116 - 0.981
epoch:13 | 0.116 - 0.992
epoch:14 | 0.116 - 0.992
epoch:15 | 0.116 - 0.994
epoch:16 | 0.116 - 0.996
epoch:17 | 0.116 - 0.996
epoch:18 | 0.116 - 0.996
No handles with labels found to put in legend.
epoch:19 | 0.116 - 0.997
============== 9/16 ==============
epoch:0 | 0.099 - 0.135
epoch:1 | 0.116 - 0.616
epoch:2 | 0.116 - 0.705
epoch:3 | 0.116 - 0.771
epoch:4 | 0.116 - 0.853
epoch:5 | 0.116 - 0.927
epoch:6 | 0.116 - 0.956
epoch:7 | 0.116 - 0.971
epoch:8 | 0.116 - 0.985
epoch:9 | 0.116 - 0.988
epoch:10 | 0.116 - 0.991
epoch:11 | 0.116 - 0.997
epoch:12 | 0.116 - 0.976
epoch:13 | 0.116 - 0.999
epoch:14 | 0.116 - 1.0
epoch:15 | 0.116 - 1.0
epoch:16 | 0.116 - 1.0
epoch:17 | 0.116 - 1.0
epoch:18 | 0.116 - 1.0
No handles with labels found to put in legend.
epoch:19 | 0.116 - 1.0
============== 10/16 ==============
epoch:0 | 0.087 - 0.119
epoch:1 | 0.117 - 0.537
epoch:2 | 0.117 - 0.67
epoch:3 | 0.116 - 0.746
epoch:4 | 0.116 - 0.762
epoch:5 | 0.116 - 0.764
epoch:6 | 0.116 - 0.8
epoch:7 | 0.116 - 0.801
epoch:8 | 0.116 - 0.807
epoch:9 | 0.116 - 0.847
epoch:10 | 0.116 - 0.865
epoch:11 | 0.116 - 0.871
epoch:12 | 0.116 - 0.935
epoch:13 | 0.116 - 0.939
epoch:14 | 0.116 - 0.966
epoch:15 | 0.116 - 0.977
epoch:16 | 0.117 - 0.987
epoch:17 | 0.117 - 0.989
epoch:18 | 0.117 - 0.99
No handles with labels found to put in legend.
epoch:19 | 0.116 - 0.991
============== 11/16 ==============
epoch:0 | 0.094 - 0.119
epoch:1 | 0.116 - 0.5
epoch:2 | 0.117 - 0.675
epoch:3 | 0.117 - 0.653
epoch:4 | 0.117 - 0.705
epoch:5 | 0.117 - 0.762
epoch:6 | 0.117 - 0.725
epoch:7 | 0.116 - 0.808
epoch:8 | 0.116 - 0.871
epoch:9 | 0.116 - 0.761
epoch:10 | 0.116 - 0.933
epoch:11 | 0.116 - 0.915
epoch:12 | 0.116 - 0.973
epoch:13 | 0.116 - 0.968
epoch:14 | 0.116 - 0.981
epoch:15 | 0.116 - 0.953
epoch:16 | 0.116 - 0.985
epoch:17 | 0.116 - 0.988
epoch:18 | 0.116 - 0.987
No handles with labels found to put in legend.
epoch:19 | 0.116 - 0.995
============== 12/16 ==============
epoch:0 | 0.1 - 0.109
epoch:1 | 0.117 - 0.471
epoch:2 | 0.117 - 0.621
epoch:3 | 0.117 - 0.669
epoch:4 | 0.117 - 0.671
epoch:5 | 0.117 - 0.672
epoch:6 | 0.117 - 0.753
epoch:7 | 0.117 - 0.754
epoch:8 | 0.117 - 0.737
epoch:9 | 0.117 - 0.767
epoch:10 | 0.117 - 0.739
epoch:11 | 0.117 - 0.787
epoch:12 | 0.117 - 0.788
epoch:13 | 0.117 - 0.798
epoch:14 | 0.117 - 0.845
epoch:15 | 0.117 - 0.866
epoch:16 | 0.117 - 0.911
epoch:17 | 0.117 - 0.965
epoch:18 | 0.117 - 0.969
No handles with labels found to put in legend.
epoch:19 | 0.117 - 0.986
============== 13/16 ==============
epoch:0 | 0.1 - 0.197
epoch:1 | 0.117 - 0.486
epoch:2 | 0.117 - 0.519
epoch:3 | 0.116 - 0.574
epoch:4 | 0.116 - 0.577
epoch:5 | 0.116 - 0.653
epoch:6 | 0.117 - 0.68
epoch:7 | 0.116 - 0.7
epoch:8 | 0.117 - 0.678
epoch:9 | 0.117 - 0.703
epoch:10 | 0.117 - 0.744
epoch:11 | 0.117 - 0.804
epoch:12 | 0.117 - 0.799
epoch:13 | 0.117 - 0.737
epoch:14 | 0.117 - 0.807
epoch:15 | 0.117 - 0.805
epoch:16 | 0.117 - 0.806
epoch:17 | 0.117 - 0.812
epoch:18 | 0.117 - 0.813
No handles with labels found to put in legend.
epoch:19 | 0.117 - 0.814
============== 14/16 ==============
epoch:0 | 0.1 - 0.1
epoch:1 | 0.116 - 0.261
epoch:2 | 0.116 - 0.464
epoch:3 | 0.116 - 0.38
epoch:4 | 0.116 - 0.479
epoch:5 | 0.116 - 0.489
epoch:6 | 0.117 - 0.474
epoch:7 | 0.116 - 0.472
epoch:8 | 0.116 - 0.503
epoch:9 | 0.116 - 0.491
epoch:10 | 0.116 - 0.504
epoch:11 | 0.116 - 0.471
epoch:12 | 0.116 - 0.516
epoch:13 | 0.116 - 0.474
epoch:14 | 0.116 - 0.453
epoch:15 | 0.116 - 0.484
epoch:16 | 0.116 - 0.481
epoch:17 | 0.116 - 0.437
No handles with labels found to put in legend.
epoch:18 | 0.117 - 0.443
epoch:19 | 0.117 - 0.411
============== 15/16 ==============
epoch:0 | 0.116 - 0.122
epoch:1 | 0.116 - 0.301
epoch:2 | 0.116 - 0.407
epoch:3 | 0.117 - 0.409
epoch:4 | 0.117 - 0.413
epoch:5 | 0.117 - 0.495
epoch:6 | 0.117 - 0.481
epoch:7 | 0.117 - 0.507
epoch:8 | 0.117 - 0.513
epoch:9 | 0.117 - 0.519
epoch:10 | 0.117 - 0.509
epoch:11 | 0.117 - 0.511
epoch:12 | 0.117 - 0.52
epoch:13 | 0.117 - 0.519
epoch:14 | 0.117 - 0.493
epoch:15 | 0.117 - 0.492
epoch:16 | 0.117 - 0.506
epoch:17 | 0.117 - 0.507
epoch:18 | 0.117 - 0.499
epoch:19 | 0.117 - 0.51
No handles with labels found to put in legend.
============== 16/16 ==============
epoch:0 | 0.092 - 0.156
epoch:1 | 0.117 - 0.233
epoch:2 | 0.116 - 0.378
epoch:3 | 0.117 - 0.41
epoch:4 | 0.117 - 0.438
epoch:5 | 0.117 - 0.426
epoch:6 | 0.117 - 0.43
epoch:7 | 0.117 - 0.421
epoch:8 | 0.117 - 0.43
epoch:9 | 0.117 - 0.431
epoch:10 | 0.117 - 0.395
epoch:11 | 0.117 - 0.421
epoch:12 | 0.117 - 0.53
epoch:13 | 0.117 - 0.513
epoch:14 | 0.117 - 0.507
epoch:15 | 0.117 - 0.508
epoch:16 | 0.117 - 0.529
epoch:17 | 0.117 - 0.533
epoch:18 | 0.117 - 0.614
epoch:19 | 0.117 - 0.621
<Figure size 640x480 with 16 Axes>
"""



# hyperparameter_optimization.py

In [9]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from common.util import shuffle_dataset
from common.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 결과를 빠르게 얻기 위해 훈련 데이터를 줄임
x_train = x_train[:500]
t_train = t_train[:500]

# 20%를 검증 데이터로 분할
validation_rate = 0.20
validation_num = int(x_train.shape[0] * validation_rate)
x_train, t_train = shuffle_dataset(x_train, t_train)
x_val = x_train[:validation_num]
t_val = t_train[:validation_num]
x_train = x_train[validation_num:]
t_train = t_train[validation_num:]


def __train(lr, weight_decay, epocs=50):
    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10, weight_decay_lambda=weight_decay)
    trainer = Trainer(network, x_train, t_train, x_val, t_val,
                      epochs=epocs, mini_batch_size=100,
                      optimizer='sgd', optimizer_param={'lr': lr}, verbose=False)
    trainer.train()

    return trainer.test_acc_list, trainer.train_acc_list


# 하이퍼파라미터 무작위 탐색======================================
optimization_trial = 100
results_val = {}
results_train = {}
for _ in range(optimization_trial):
    # 탐색한 하이퍼파라미터의 범위 지정===============
    weight_decay = 10 ** np.random.uniform(-8, -4)
    lr = 10 ** np.random.uniform(-6, -2)
    # ================================================

    val_acc_list, train_acc_list = __train(lr, weight_decay)
    print("val acc:" + str(val_acc_list[-1]) + " | lr:" + str(lr) + ", weight decay:" + str(weight_decay))
    key = "lr:" + str(lr) + ", weight decay:" + str(weight_decay)
    results_val[key] = val_acc_list
    results_train[key] = train_acc_list

# 그래프 그리기========================================================
print("=========== Hyper-Parameter Optimization Result ===========")
graph_draw_num = 20
col_num = 5
row_num = int(np.ceil(graph_draw_num / col_num))
i = 0

for key, val_acc_list in sorted(results_val.items(), key=lambda x:x[1][-1], reverse=True):
    print("Best-" + str(i+1) + "(val acc:" + str(val_acc_list[-1]) + ") | " + key)

    plt.subplot(row_num, col_num, i+1)
    plt.title("Best-" + str(i+1))
    plt.ylim(0.0, 1.0)
    if i % 5: plt.yticks([])
    plt.xticks([])
    x = np.arange(len(val_acc_list))
    plt.plot(x, val_acc_list)
    plt.plot(x, results_train[key], "--")
    i += 1

    if i >= graph_draw_num:
        break

plt.show()

val acc:0.23 | lr:0.00041573680063862624, weight decay:1.7421173695009184e-08
val acc:0.73 | lr:0.008059345570336525, weight decay:5.38942229384971e-07
val acc:0.11 | lr:8.086476915678654e-06, weight decay:2.388842811212531e-06
val acc:0.06 | lr:1.5004191392841905e-05, weight decay:1.478624936323425e-08
val acc:0.82 | lr:0.008810757614763862, weight decay:2.4710151564284837e-05
val acc:0.29 | lr:0.0020181433279115746, weight decay:2.215813011399626e-08
val acc:0.67 | lr:0.00544059305428972, weight decay:5.410682047313105e-06
val acc:0.09 | lr:8.564567123043342e-05, weight decay:3.709477327421879e-06
val acc:0.79 | lr:0.008759308709890798, weight decay:1.774108350135052e-06


KeyboardInterrupt: 

# optimizer_compare_mnist.py

In [None]:
# coding: utf-8
import os
import sys
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet
from common.optimizer import *


# 0. MNIST 데이터 읽기==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000


# 1. 실험용 설정==========
optimizers = {}
optimizers['SGD'] = SGD()
optimizers['Momentum'] = Momentum()
optimizers['AdaGrad'] = AdaGrad()
optimizers['Adam'] = Adam()
#optimizers['RMSprop'] = RMSprop()

networks = {}
train_loss = {}
for key in optimizers.keys():
    networks[key] = MultiLayerNet(
        input_size=784, hidden_size_list=[100, 100, 100, 100],
        output_size=10)
    train_loss[key] = []    


# 2. 훈련 시작==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    for key in optimizers.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizers[key].update(networks[key].params, grads)
    
        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)
    
    if i % 100 == 0:
        print( "===========" + "iteration:" + str(i) + "===========")
        for key in optimizers.keys():
            loss = networks[key].loss(x_batch, t_batch)
            print(key + ":" + str(loss))


# 3. 그래프 그리기==========
markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"}
x = np.arange(max_iterations)
for key in optimizers.keys():
    plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key)
plt.xlabel("iterations")
plt.ylabel("loss")
plt.ylim(0, 1)
plt.legend()
plt.show()

# optimizer_compare_naive.py

In [None]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
from common.optimizer import *


def f(x, y):
    return x**2 / 20.0 + y**2


def df(x, y):
    return x / 10.0, 2.0*y

init_pos = (-7.0, 2.0)
params = {}
params['x'], params['y'] = init_pos[0], init_pos[1]
grads = {}
grads['x'], grads['y'] = 0, 0


optimizers = OrderedDict()
optimizers["SGD"] = SGD(lr=0.95)
optimizers["Momentum"] = Momentum(lr=0.1)
optimizers["AdaGrad"] = AdaGrad(lr=1.5)
optimizers["Adam"] = Adam(lr=0.3)

idx = 1

for key in optimizers:
    optimizer = optimizers[key]
    x_history = []
    y_history = []
    params['x'], params['y'] = init_pos[0], init_pos[1]
    
    for i in range(30):
        x_history.append(params['x'])
        y_history.append(params['y'])
        
        grads['x'], grads['y'] = df(params['x'], params['y'])
        optimizer.update(params, grads)
    

    x = np.arange(-10, 10, 0.01)
    y = np.arange(-5, 5, 0.01)
    
    X, Y = np.meshgrid(x, y) 
    Z = f(X, Y)
    
    # 외곽선 단순화
    mask = Z > 7
    Z[mask] = 0
    
    # 그래프 그리기
    plt.subplot(2, 2, idx)
    idx += 1
    plt.plot(x_history, y_history, 'o-', color="red")
    plt.contour(X, Y, Z)
    plt.ylim(-10, 10)
    plt.xlim(-10, 10)
    plt.plot(0, 0, '+')
    #colorbar()
    #spring()
    plt.title(key)
    plt.xlabel("x")
    plt.ylabel("y")
    
plt.show()

# overfit_dropout.py

In [None]:
# coding: utf-8
import os
import sys
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.multi_layer_net_extend import MultiLayerNetExtend
from common.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 오버피팅을 재현하기 위해 학습 데이터 수를 줄임
x_train = x_train[:300]
t_train = t_train[:300]

# 드롭아웃 사용 유무와 비울 설정 ========================
use_dropout = True  # 드롭아웃을 쓰지 않을 때는 False
dropout_ratio = 0.2
# ====================================================

network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
                              output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio)
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=301, mini_batch_size=100,
                  optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True)
trainer.train()

train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list

# 그래프 그리기==========
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

# overfit_weight_decay.py

In [None]:
# coding: utf-8
import os
import sys

sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

# 오버피팅을 재현하기 위해 학습 데이터 수를 줄임
x_train = x_train[:300]
t_train = t_train[:300]

# weight decay（가중치 감쇠） 설정 =======================
#weight_decay_lambda = 0 # weight decay를 사용하지 않을 경우
weight_decay_lambda = 0.1
# ====================================================

network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10,
                        weight_decay_lambda=weight_decay_lambda)
optimizer = SGD(lr=0.01) # 학습률이 0.01인 SGD로 매개변수 갱신

max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grads = network.gradient(x_batch, t_batch)
    optimizer.update(network.params, grads)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc))

        epoch_cnt += 1
        if epoch_cnt >= max_epochs:
            break


# 그래프 그리기==========
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

# weight_init_activation_histogram.py 

In [None]:
# coding: utf-8
import numpy as np
import matplotlib.pyplot as plt


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def ReLU(x):
    return np.maximum(0, x)


def tanh(x):
    return np.tanh(x)
    
input_data = np.random.randn(1000, 100)  # 1000개의 데이터
node_num = 100  # 각 은닉층의 노드(뉴런) 수
hidden_layer_size = 5  # 은닉층이 5개
activations = {}  # 이곳에 활성화 결과를 저장

x = input_data

for i in range(hidden_layer_size):
    if i != 0:
        x = activations[i-1]

    # 초깃값을 다양하게 바꿔가며 실험해보자！
    w = np.random.randn(node_num, node_num) * 1
    # w = np.random.randn(node_num, node_num) * 0.01
    # w = np.random.randn(node_num, node_num) * np.sqrt(1.0 / node_num)
    # w = np.random.randn(node_num, node_num) * np.sqrt(2.0 / node_num)


    a = np.dot(x, w)


    # 활성화 함수도 바꿔가며 실험해보자！
    z = sigmoid(a)
    # z = ReLU(a)
    # z = tanh(a)

    activations[i] = z

# 히스토그램 그리기
for i, a in activations.items():
    plt.subplot(1, len(activations), i+1)
    plt.title(str(i+1) + "-layer")
    if i != 0: plt.yticks([], [])
    # plt.xlim(0.1, 1)
    # plt.ylim(0, 7000)
    plt.hist(a.flatten(), 30, range=(0,1))
plt.show()

In [None]:
# weight_init_compare.py

In [None]:
# coding: utf-8
import os
import sys

sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.util import smooth_curve
from common.multi_layer_net import MultiLayerNet
from common.optimizer import SGD


# 0. MNIST 데이터 읽기==========
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_size = x_train.shape[0]
batch_size = 128
max_iterations = 2000


# 1. 실험용 설정==========
weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}
optimizer = SGD(lr=0.01)

networks = {}
train_loss = {}
for key, weight_type in weight_init_types.items():
    networks[key] = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100],
                                  output_size=10, weight_init_std=weight_type)
    train_loss[key] = []


# 2. 훈련 시작==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    for key in weight_init_types.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizer.update(networks[key].params, grads)
    
        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)
    
    if i % 100 == 0:
        print("===========" + "iteration:" + str(i) + "===========")
        for key in weight_init_types.keys():
            loss = networks[key].loss(x_batch, t_batch)
            print(key + ":" + str(loss))


# 3. 그래프 그리기==========
markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
x = np.arange(max_iterations)
for key in weight_init_types.keys():
    plt.plot(x, smooth_curve(train_loss[key]), marker=markers[key], markevery=100, label=key)
plt.xlabel("iterations")
plt.ylabel("loss")
plt.ylim(0, 2.5)
plt.legend()
plt.show()