In [None]:
# 各種正則化機能を確認していく
# Early Stopping：簡単な実装が見つからない。scikit-learnにはあるし、kerasでは簡単だし何で？
# L1正則化：L1ノルム(マンハッタンノルム)
# L2正則化：L2ノルム(ユークリウッドノルム)
# おさらいとしてLx の添え字が大きくなるほど大きな値を重視し、小さな値を無視する傾向になる
# ドロップアウト
# 重み上限正則化：入力接続の重みをL2ノルムを使用して制限をかける
# データ拡張：訓練中に訓練セットに対し、平行移動、開店、サイズ変更、反転、トリミングなどをかける


# 実践的には以下を最初に試すのがよい
# 初期値：Heの初期値
# 活性化関数：ELU(SELU?もあり)
# 正規化：バッチ正規化
# 正則化：ドロップアウト このページで実装あり
# オプティマイザ：NAG
# 学習率のスケジューリング：なし

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

In [2]:
# いつものMNIST
import numpy as np
import tensorflow as tf
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [3]:
# いつものランダムバッチ処理
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [4]:
reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [5]:
# L1正則化のハイパーパラメータ
scale = 0.001

In [6]:
# L1,L2正則化。partial関数を使って引数として重みを受け取り、対応する正則化ロスを返す関数を返す
from functools import partial
my_dense_layer = partial(
    tf.layers.dense, activation=tf.nn.relu,
    kernel_regularizer=tf.contrib.layers.l1_regularizer(scale)) # L1正則化のハイパーパラメータ値

# DropOutの設定
training = tf.placeholder_with_default(False, shape=(), name='training') # ドロップアウトの設定
dropout_rate = 0.5  # == 1 - keep_prob # ドロップアウト率
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X_drop, n_hidden1, name="hidden1") # partialメソッドで実装したmy_dense_layer。X_drop層を入力にする
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training) # ドロップアウト層を設定。隠れ層1を入力にする
    hidden2 = my_dense_layer(hidden1_drop, n_hidden2, name="hidden2") # my_dense_layer。ドロップアウト層1を入力にする
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training) # ドロップアウト層を設定。隠れ層2を入力にする
    logits = my_dense_layer(hidden2_drop, n_outputs, activation=None, # ドロップアウト層2を入力にする
                            name="outputs")
    
    



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.dense instead.


In [7]:
# L1.L2正則化をロス全体に適用する
with tf.name_scope("loss"):                                     # not shown in the book
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(  # not shown
        labels=y, logits=logits)                                # not shown
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy")   # not shown
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # tf.GraphKeys.REGULARIZATION_LOSSES でmy_dense_layerの設定にアクセス
    loss = tf.add_n([base_loss] + reg_losses, name="loss")

In [8]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()



In [9]:
n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Validation accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")
    
# 正則化効かせすぎると当然性能は落ちるよね。過学習とのトレードはテストデータでしか測れないかな？

0 Validation accuracy: 0.6958
1 Validation accuracy: 0.766
2 Validation accuracy: 0.818
3 Validation accuracy: 0.8374
4 Validation accuracy: 0.8492
5 Validation accuracy: 0.8568
6 Validation accuracy: 0.8642
7 Validation accuracy: 0.8698
8 Validation accuracy: 0.8738
9 Validation accuracy: 0.8802
10 Validation accuracy: 0.883
11 Validation accuracy: 0.886
12 Validation accuracy: 0.886
13 Validation accuracy: 0.888
14 Validation accuracy: 0.8916
15 Validation accuracy: 0.8902
16 Validation accuracy: 0.8916
17 Validation accuracy: 0.8928
18 Validation accuracy: 0.8936
19 Validation accuracy: 0.8924


In [10]:
# 事前にtensorboard.py を持ってくること
from tensorboard import show_graph

In [11]:
show_graph(tf.get_default_graph())