In [1]:
from datetime import datetime
import numpy as np
import tensorflow as tf

In [2]:
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

n_epochs = 40
batch_size = 50

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
log_dir = "{}/run-{}/".format(root_logdir, now)

In [3]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

# batch_normalizationのためのフラグ、学習時はTrueとしてミニバッチの平均を使用、予測時はFalseとして訓練セット全体の平均(移動平均)
training = tf.placeholder_with_default(False, shape=(), name="training")

In [4]:
# with tf.name_scope("dnn"):
#     he_init = tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG")
    
#     with tf.name_scope("hidden1"):
#         hidden1 = tf.layers.dense(X, n_hidden1, name="dense", kernel_initializer=he_init)
#         # batch normalizationの層　tf.nn.batch_normalizationはめんどくさい
#         # 移動平均を計算するため、momentumの引数が必要(データセットが大きく、ミニバッチが小さい場合はより1に近い方が良いらしい)
#         bn1 = tf.layers.batch_normalization(hidden1, training=training, momentum=0.9)
#         bn1_act = tf.nn.elu(bn1) # 活性化関数はbatch_normalizationの後
    
#     with tf.name_scope("hidden2"):
#         hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="dense", kernel_initializer=he_init)
#         bn2 = tf.layers.batch_normalization(hidden2, training=training, momentum=0.9)
#         bn2_act = tf.nn.elu(bn2)
    
#     with tf.name_scope("logits"):
#         logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs", kernel_initializer=he_init)
#         logits = tf.layers.batch_normalization(logits_before_bn, training=training, momentum=0.9)

In [5]:
# 同じ関数に同じ引数を用いることが多いので、、
from functools import partial
# デフォルトの引数を指定した新たな関数
my_batch_normalization = partial(tf.layers.batch_normalization, training=training, momentum=0.9)

In [6]:
with tf.name_scope("dnn"):
    he_init1 = tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG")
    he_init2 = tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG")
    he_init3 = tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG")
    
    with tf.name_scope("hidden1"):
        hidden1 = tf.layers.dense(X, n_hidden1, name="dense1", kernel_initializer=he_init1)
        bn1 = my_batch_normalization(hidden1)
        bn1_act = tf.nn.elu(bn1) # 活性化関数はbatch_normalizationの後
    
    with tf.name_scope("hidden2"):
        hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="dense2", kernel_initializer=he_init2)
        bn2 = my_batch_normalization(hidden2)
        bn2_act = tf.nn.elu(bn2)
    
    with tf.name_scope("logits"):
        logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs", kernel_initializer=he_init3)
        logits = my_batch_normalization(logits_before_bn)

In [7]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [8]:
learning_rate = 0.01
threshold = 1.0

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
#     training_op = optimizer.minimize(loss)
    # 勾配クリッピング : 勾配爆発問題を軽減するため、一定の閾値を越えないようにするテクニック
    grads_and_vars = optimizer.compute_gradients(loss)
    capped_gvs = [
        (tf.clip_by_value(grad, -threshold, threshold), var)
        for grad, var in grads_and_vars
    ]
    training_op = optimizer.apply_gradients(capped_gvs)

In [9]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy =  tf.reduce_mean(tf.cast(correct, tf.float32))

In [10]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.name_scope("train_eval"):
    loss_summary_train = tf.summary.scalar('loss', loss)
    acc_summary_train = tf.summary.scalar('accuracy', accuracy)
    
with tf.name_scope("val_eval"):
    loss_summary_val = tf.summary.scalar('loss', loss)
    acc_summary_val = tf.summary.scalar('accuracy', accuracy)

In [11]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("~/tf_data/")

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ~/tf_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ~/tf_data/train-labels-idx1-ubyte.gz
Extracting ~/tf_data/t10k-images-idx3-ubyte.gz
Extracting ~/tf_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [13]:
# batch normalizationの移動平均を求めるため、自動でいくつかのオペレーションが作成され、UPDATE_OPSコレクションに追加される
# これらのオペレーションも実行してあげる必要があるため、このコレクションのオペレーションを得て訓練で実行
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    
    with tf.summary.FileWriter(log_dir, tf.get_default_graph()) as file_writer:
        for epoch in range(n_epochs):
            for iteration in range(mnist.train.num_examples // batch_size):
                X_batch, y_batch = mnist.train.next_batch(batch_size)
                sess.run([training_op,extra_update_ops], feed_dict={X: X_batch, y: y_batch, training: True})

                train_list = [acc_summary_train, loss_summary_train]
                loss_train, acc_train = sess.run(
                    train_list,
                    feed_dict={X: X_batch, y: y_batch})
                file_writer.add_summary(loss_train, epoch)
                file_writer.add_summary(acc_train, epoch)

            val_list = [acc_summary_val, loss_summary_val]
            loss_val, acc_val = sess.run(
                val_list,
                feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
            file_writer.add_summary(loss_val, epoch)
            file_writer.add_summary(acc_val, epoch)

        save_path = saver.save(sess, "./logs/dnn_bn/my_model_final.ckpt")