In [1]:
import numpy as np
import tensorflow as tf

def load_mnist():
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
    X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
    X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
    y_train = y_train.astype(np.int32)
    y_test = y_test.astype(np.int32)
    return X_train, y_train, X_test, y_test

In [2]:
sess = tf.InteractiveSession()

In [3]:
X_train, y_train, X_test, y_test = load_mnist()

In [4]:
n_inputs = X_train.shape[1]
# n_inputs here is 28*28: an image flattened

n_hidden1 = 300
n_hidden2 = 100

n_outputs = len(np.unique(y_train))

# None = any size
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")


In [None]:
n_outputs

In [5]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])

        init = tf.truncated_normal((n_inputs, n_neurons))
        # or: with stddev = 2 / np.sqrt(n_inputs + n_neurons)
        
        W = tf.Variable(init, name="kernel")
        variable_summaries(W)
        
        b = tf.Variable(tf.zeros([n_neurons]), name='bias')
        variable_summaries(b)
        
        Z = tf.matmul(X, W) + b
        tf.summary.histogram("pre-activation", Z)

        if activation is not None:
            A = activation(Z)
            tf.summary.histogram("activation", A)
            return A
        else:
            return Z

In [9]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")

In [None]:
tf.nn.sparse_softmax_cross_entropy_with_logits??

In [10]:
# logits stands for output of the network BEFORE going through 
# softmax activation function
with tf.name_scope("loss"):
    xe = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
#     tf.summary.scalar("cross-entropy", xe)
    loss = tf.reduce_mean(xe, name="loss")

In [11]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [12]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    tf.summary.scalar("accuracy", accuracy)

In [13]:
merged = tf.summary.merge_all()

train_writer = tf.summary.FileWriter('./train', sess.graph)
test_writer = tf.summary.FileWriter('./test')

tf.global_variables_initializer().run()

In [14]:
n_epochs = 30
batch_size = 50

num_examples = X_train.shape[0]

saver = tf.train.Saver()

for epoch in range(n_epochs):
    for it in range(num_examples // batch_size):
        start = it*batch_size
        end = start + batch_size
        X_batch, y_batch = X_train[start : end, :], y_train[start : end]

        if it % 10 == 0:
            summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(it, batch_size, False))
            test_writer.add_summary(summary, it)
            if it % 1000 == 0:
                print("Iteration {}, val acc: {}".format(it, acc))
        else:
            summary, _ = sess.run([merged, training_op], feed_dict=feed_dict(it, batch_size, True))
            train_writer.add_summary(summary, it)

    acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
    acc_val = accuracy.eval(feed_dict={X: X_test, y: y_test})

    print("\n>>> Epoch {}. train acc: {:2g}, val acc: {:2g}".format(epoch+1, acc_train, acc_val))

save_path = saver.save(sess, "./my_model.ckpt")
    
train_writer.close()
test_writer.close()
       

Iteration 0, val acc: 0.0828000009059906
Iteration 1000, val acc: 0.7360000014305115

>>> Epoch 1. train acc: 0.9, val acc: 0.7249
Iteration 0, val acc: 0.7249000072479248
Iteration 1000, val acc: 0.7013000249862671

>>> Epoch 2. train acc: 0.88, val acc: 0.7438
Iteration 0, val acc: 0.7437999844551086
Iteration 1000, val acc: 0.7264999747276306

>>> Epoch 3. train acc: 0.94, val acc: 0.7675
Iteration 0, val acc: 0.7674999833106995
Iteration 1000, val acc: 0.7652999758720398

>>> Epoch 4. train acc: 0.94, val acc: 0.7833
Iteration 0, val acc: 0.78329998254776
Iteration 1000, val acc: 0.8021000027656555

>>> Epoch 5. train acc: 0.94, val acc: 0.7977
Iteration 0, val acc: 0.7976999878883362
Iteration 1000, val acc: 0.8154000043869019

>>> Epoch 6. train acc: 0.94, val acc: 0.8066
Iteration 0, val acc: 0.8065999746322632
Iteration 1000, val acc: 0.8234999775886536

>>> Epoch 7. train acc: 0.94, val acc: 0.8145
Iteration 0, val acc: 0.8144999742507935
Iteration 1000, val acc: 0.82999998331

KeyboardInterrupt: 

In [7]:
def feed_dict(it, batch_size, train):
    if train:
        start = it*batch_size
        end = start + batch_size
        X_batch, y_batch = X_train[start : end, :], y_train[start : end]
    else:
        X_batch, y_batch =  X_test, y_test
        
    return {X: X_batch, y: y_batch}

In [8]:
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)