In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
from lib import helper

In [3]:
mnist = input_data.read_data_sets("data/", one_hot=True)
test_data = mnist.test.images
test_label = mnist.test.labels

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


In [4]:
num_inputs = 784 # 28x28
num_outputs = 10
num_units = 256
num_layers = 8
batch_size = 64
num_epocs= 25000
learning_rate = 0.001
use_dropout = False
in_dropout_rate = 0.2
all_dropout_rate = 0.4

In [5]:
myint = tf.int32
myfloat = tf.float32

In [6]:
# placeholder
x = tf.placeholder(dtype=myfloat, shape=[None, num_inputs], name='inputs')
y = tf.placeholder(dtype=myfloat, shape=[None, num_outputs], name='outputs')
is_training = tf.placeholder(dtype=tf.bool, name='is_training')

In [7]:
# network
if use_dropout:
    x = tf.layers.dropout(x, rate=in_dropout_rate, training=is_training)
for i in range(num_layers):
    if i ==0:
        layer = tf.layers.dense(x, num_units, tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer(stddev=0.1), name='l_{}'.format(i))
    else:
        layer = tf.layers.dense(layer, num_units, tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer(stddev=0.1), name='l_{}'.format(i))
    if use_dropout:
        layer = tf.layers.dropout(layer, rate=all_dropout_rate , training=is_training, name='l_dropout_{}'.format(i))

out = tf.layers.dense(layer,
                           num_outputs, 
                           tf.nn.softmax, 
                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                           name='out_layer')

In [8]:
loss_ce = tf.reduce_mean(-tf.reduce_sum(y * tf.log(out + 10e-8), 1))
loss_se = tf.reduce_sum(tf.square(y - out))

optimizer = tf.train.AdamOptimizer(learning_rate)
params = tf.trainable_variables()
gradients = tf.gradients(loss_ce, params)
train = optimizer.apply_gradients(zip(gradients, params))

grad_norm = [tf.norm(g) for g in gradients]
grad_norm_sum = tf.reduce_sum(grad_norm)

In [9]:
y_label = tf.argmax(y, 1)
out_label = tf.argmax(out, 1)

correct = tf.equal(y_label, out_label)
accuracy = tf.reduce_mean(tf.cast(correct, myfloat))

In [10]:
with tf.name_scope('train'):
    smr_loss_ce = tf.summary.scalar('loss_cross_entropy', loss_ce)
    smr_loss_se = tf.summary.scalar('loss_squared_error', loss_se)
    smr_acc = tf.summary.scalar('accuracy', accuracy)
    smr_grad = tf.summary.scalar('gradient', grad_norm_sum)
    merged_summary = tf.summary.merge([smr_loss_ce, smr_loss_se, smr_acc, smr_grad])

with tf.name_scope('test'):
    test_smr_acc = tf.summary.scalar('accuracy', accuracy)

init = tf.global_variables_initializer()

In [11]:
from datetime import datetime
now = datetime.now()
use_dropout_str = 'on' if use_dropout else 'off'
logdir_base = 'logs/dropout_relu/dropout={}/units={}/layers={}/'.format(use_dropout_str, num_units, num_layers)
logdir = logdir_base #+ now.strftime("%Y%m%d-%H%M%S") + "/"

In [None]:
logdir_base = 'logs/dropout_relu/dropout={}/units={}/layers={}/'.format(use_dropout_str, num_units, num_layers)
logdir = logdir_base #+ now.strftime("%Y%m%d-%H%M%S") + "/"
with tf.Session() as sess:
    writer = tf.summary.FileWriter(logdir, sess.graph)
    sess.run(init)
    for i in range(num_epocs):
        train_data, train_label = mnist.train.next_batch(batch_size)
        train_step, loss, train_acc, smr = sess.run([train, loss_se, accuracy, merged_summary], feed_dict={
            x: train_data,
            y: train_label,
            is_training: True
        })
        writer.add_summary(smr, i)

        if i % 10 == 0:
            
            test_acc_1, test_smr = sess.run([accuracy, test_smr_acc], feed_dict={
                x: test_data,
                y: test_label,
                is_training: False
            })
            print('epoc: ', i, ', loss: ', loss, ', train_acc: ', train_acc, 'test_acc, ', test_acc_1)
            writer.add_summary(test_smr, i)

    test_acc = sess.run(accuracy, feed_dict={
                x: test_data,
                y: test_label,
                is_training: False
            })
    print('test accuracy: ', test_acc)
    writer.close()

epoc:  0 , loss:  57.301 , train_acc:  0.140625 test_acc,  0.2429
epoc:  10 , loss:  35.0076 , train_acc:  0.640625 test_acc,  0.7112
epoc:  20 , loss:  22.094 , train_acc:  0.78125 test_acc,  0.7642
epoc:  30 , loss:  16.5289 , train_acc:  0.84375 test_acc,  0.8372
epoc:  40 , loss:  14.1208 , train_acc:  0.859375 test_acc,  0.8776
epoc:  50 , loss:  19.9221 , train_acc:  0.765625 test_acc,  0.8656
epoc:  60 , loss:  19.1711 , train_acc:  0.765625 test_acc,  0.8502
epoc:  70 , loss:  13.2825 , train_acc:  0.859375 test_acc,  0.8786
epoc:  80 , loss:  13.4303 , train_acc:  0.859375 test_acc,  0.9021
epoc:  90 , loss:  7.93969 , train_acc:  0.90625 test_acc,  0.8916
epoc:  100 , loss:  10.545 , train_acc:  0.890625 test_acc,  0.9134
epoc:  110 , loss:  13.987 , train_acc:  0.859375 test_acc,  0.9022
epoc:  120 , loss:  16.9605 , train_acc:  0.796875 test_acc,  0.8914
epoc:  130 , loss:  11.2151 , train_acc:  0.859375 test_acc,  0.9207
epoc:  140 , loss:  4.90197 , train_acc:  0.96875 te

epoc:  1210 , loss:  2.79805 , train_acc:  0.96875 test_acc,  0.9605
epoc:  1220 , loss:  0.125124 , train_acc:  1.0 test_acc,  0.9566
epoc:  1230 , loss:  4.11851 , train_acc:  0.96875 test_acc,  0.9617
epoc:  1240 , loss:  0.308063 , train_acc:  1.0 test_acc,  0.9544
epoc:  1250 , loss:  5.84804 , train_acc:  0.953125 test_acc,  0.9581
epoc:  1260 , loss:  1.7598 , train_acc:  0.96875 test_acc,  0.9579
epoc:  1270 , loss:  4.6851 , train_acc:  0.9375 test_acc,  0.9589
epoc:  1280 , loss:  7.69558 , train_acc:  0.921875 test_acc,  0.9674
epoc:  1290 , loss:  3.14108 , train_acc:  0.953125 test_acc,  0.9568
epoc:  1300 , loss:  3.51788 , train_acc:  0.96875 test_acc,  0.9578
epoc:  1310 , loss:  0.076602 , train_acc:  1.0 test_acc,  0.9527
epoc:  1320 , loss:  4.90059 , train_acc:  0.953125 test_acc,  0.9646
epoc:  1330 , loss:  5.71061 , train_acc:  0.9375 test_acc,  0.9623
epoc:  1340 , loss:  3.56269 , train_acc:  0.96875 test_acc,  0.9664
epoc:  1350 , loss:  1.12828 , train_acc:  

epoc:  2410 , loss:  1.25235 , train_acc:  1.0 test_acc,  0.9717
epoc:  2420 , loss:  5.23977 , train_acc:  0.953125 test_acc,  0.9676
epoc:  2430 , loss:  1.33367 , train_acc:  0.984375 test_acc,  0.9692
epoc:  2440 , loss:  2.68036 , train_acc:  0.953125 test_acc,  0.9693
epoc:  2450 , loss:  4.36945 , train_acc:  0.953125 test_acc,  0.9666
epoc:  2460 , loss:  0.0869225 , train_acc:  1.0 test_acc,  0.968
epoc:  2470 , loss:  0.585863 , train_acc:  1.0 test_acc,  0.9678
epoc:  2480 , loss:  0.127511 , train_acc:  1.0 test_acc,  0.9716
epoc:  2490 , loss:  1.7369 , train_acc:  0.984375 test_acc,  0.9621
epoc:  2500 , loss:  2.0061 , train_acc:  0.984375 test_acc,  0.9685
epoc:  2510 , loss:  4.45275 , train_acc:  0.953125 test_acc,  0.9671
epoc:  2520 , loss:  3.21071 , train_acc:  0.96875 test_acc,  0.9662
epoc:  2530 , loss:  1.63278 , train_acc:  0.984375 test_acc,  0.9633
epoc:  2540 , loss:  2.88188 , train_acc:  0.953125 test_acc,  0.9733
epoc:  2550 , loss:  0.658753 , train_ac

epoc:  3610 , loss:  5.73556 , train_acc:  0.9375 test_acc,  0.9691
epoc:  3620 , loss:  0.0508676 , train_acc:  1.0 test_acc,  0.9728
epoc:  3630 , loss:  1.79253 , train_acc:  0.984375 test_acc,  0.9729
epoc:  3640 , loss:  2.43787 , train_acc:  0.96875 test_acc,  0.9738
epoc:  3650 , loss:  3.47154 , train_acc:  0.96875 test_acc,  0.9672
epoc:  3660 , loss:  0.358033 , train_acc:  1.0 test_acc,  0.9691
epoc:  3670 , loss:  0.965075 , train_acc:  0.984375 test_acc,  0.9735
epoc:  3680 , loss:  3.29098 , train_acc:  0.96875 test_acc,  0.9757
epoc:  3690 , loss:  2.04299 , train_acc:  0.984375 test_acc,  0.9736
epoc:  3700 , loss:  2.07145 , train_acc:  0.984375 test_acc,  0.9732
epoc:  3710 , loss:  0.484789 , train_acc:  1.0 test_acc,  0.9745
epoc:  3720 , loss:  1.86067 , train_acc:  0.96875 test_acc,  0.9751
epoc:  3730 , loss:  1.85494 , train_acc:  0.984375 test_acc,  0.9726
epoc:  3740 , loss:  3.34017 , train_acc:  0.96875 test_acc,  0.9746
epoc:  3750 , loss:  2.92511 , train_

epoc:  4820 , loss:  0.826438 , train_acc:  0.984375 test_acc,  0.973
epoc:  4830 , loss:  1.31989 , train_acc:  0.984375 test_acc,  0.9734
epoc:  4840 , loss:  0.691148 , train_acc:  1.0 test_acc,  0.954
epoc:  4850 , loss:  4.34714 , train_acc:  0.953125 test_acc,  0.9729
epoc:  4860 , loss:  2.93273 , train_acc:  0.96875 test_acc,  0.9769
epoc:  4870 , loss:  0.606819 , train_acc:  1.0 test_acc,  0.973
epoc:  4880 , loss:  8.13397 , train_acc:  0.921875 test_acc,  0.9719
epoc:  4890 , loss:  0.0380528 , train_acc:  1.0 test_acc,  0.9744
epoc:  4900 , loss:  2.20644 , train_acc:  0.96875 test_acc,  0.9756
epoc:  4910 , loss:  2.27738 , train_acc:  0.96875 test_acc,  0.9774
epoc:  4920 , loss:  1.61932 , train_acc:  0.984375 test_acc,  0.9718
epoc:  4930 , loss:  0.207759 , train_acc:  1.0 test_acc,  0.9766
epoc:  4940 , loss:  2.31079 , train_acc:  0.984375 test_acc,  0.9717
epoc:  4950 , loss:  1.85285 , train_acc:  0.984375 test_acc,  0.974
epoc:  4960 , loss:  1.63216 , train_acc:

epoc:  6030 , loss:  0.630161 , train_acc:  1.0 test_acc,  0.9676
epoc:  6040 , loss:  2.11503 , train_acc:  0.984375 test_acc,  0.9719
epoc:  6050 , loss:  1.14684 , train_acc:  0.984375 test_acc,  0.9671
epoc:  6060 , loss:  2.43698 , train_acc:  0.984375 test_acc,  0.968
epoc:  6070 , loss:  1.4138 , train_acc:  0.984375 test_acc,  0.9726
epoc:  6080 , loss:  0.803088 , train_acc:  1.0 test_acc,  0.9709
epoc:  6090 , loss:  0.0296407 , train_acc:  1.0 test_acc,  0.9719
epoc:  6100 , loss:  2.85582 , train_acc:  0.96875 test_acc,  0.9726
epoc:  6110 , loss:  0.111077 , train_acc:  1.0 test_acc,  0.9727
epoc:  6120 , loss:  0.00312624 , train_acc:  1.0 test_acc,  0.9747
epoc:  6130 , loss:  2.0016 , train_acc:  0.984375 test_acc,  0.9744
epoc:  6140 , loss:  4.04213 , train_acc:  0.96875 test_acc,  0.9765
epoc:  6150 , loss:  3.79496 , train_acc:  0.96875 test_acc,  0.969
epoc:  6160 , loss:  1.73978 , train_acc:  0.984375 test_acc,  0.9711
epoc:  6170 , loss:  5.39931 , train_acc:  0

epoc:  7240 , loss:  1.90803 , train_acc:  0.96875 test_acc,  0.9746
epoc:  7250 , loss:  0.0306423 , train_acc:  1.0 test_acc,  0.9738
epoc:  7260 , loss:  1.40407 , train_acc:  0.984375 test_acc,  0.9762
epoc:  7270 , loss:  0.828597 , train_acc:  0.984375 test_acc,  0.9755
epoc:  7280 , loss:  0.0961223 , train_acc:  1.0 test_acc,  0.9778
epoc:  7290 , loss:  1.47959 , train_acc:  0.984375 test_acc,  0.9711
epoc:  7300 , loss:  3.13173 , train_acc:  0.96875 test_acc,  0.9741
epoc:  7310 , loss:  0.159085 , train_acc:  1.0 test_acc,  0.9741
epoc:  7320 , loss:  0.758898 , train_acc:  1.0 test_acc,  0.9755
epoc:  7330 , loss:  1.98415 , train_acc:  0.984375 test_acc,  0.974
epoc:  7340 , loss:  1.75006 , train_acc:  0.984375 test_acc,  0.9682
epoc:  7350 , loss:  1.537 , train_acc:  0.984375 test_acc,  0.9751
epoc:  7360 , loss:  0.768034 , train_acc:  0.984375 test_acc,  0.9744
epoc:  7370 , loss:  2.1824 , train_acc:  0.984375 test_acc,  0.9705
epoc:  7380 , loss:  0.463772 , train_

epoc:  8450 , loss:  0.00478878 , train_acc:  1.0 test_acc,  0.9783
epoc:  8460 , loss:  0.402744 , train_acc:  1.0 test_acc,  0.9781
epoc:  8470 , loss:  0.000233168 , train_acc:  1.0 test_acc,  0.9754
epoc:  8480 , loss:  6.86847 , train_acc:  0.921875 test_acc,  0.966
epoc:  8490 , loss:  1.0883 , train_acc:  1.0 test_acc,  0.9627
epoc:  8500 , loss:  1.46594 , train_acc:  0.984375 test_acc,  0.9723
epoc:  8510 , loss:  0.0486859 , train_acc:  1.0 test_acc,  0.9751
epoc:  8520 , loss:  0.103983 , train_acc:  1.0 test_acc,  0.9781
epoc:  8530 , loss:  1.8924 , train_acc:  0.984375 test_acc,  0.9748
epoc:  8540 , loss:  2.96486 , train_acc:  0.96875 test_acc,  0.9745
epoc:  8550 , loss:  2.59514 , train_acc:  0.96875 test_acc,  0.9681
epoc:  8560 , loss:  2.51187 , train_acc:  0.96875 test_acc,  0.9728
epoc:  8570 , loss:  1.43471 , train_acc:  1.0 test_acc,  0.9753
epoc:  8580 , loss:  3.6999 , train_acc:  0.96875 test_acc,  0.9703
epoc:  8590 , loss:  0.863982 , train_acc:  0.984375