In [1]:
import os

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime

os.environ["CUDA_VISIBLE_DEVICES"]="1"

  from ._conv import register_converters as _register_converters


## Construction Phase

In [2]:
# see p. 268 Géron

def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        ## truncated gaussian to avoid large weight inits
        stddev = 2 / np.sqrt(n_inputs + n_neurons) 
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return(activation(Z))
        else:
            return(Z)    

In [3]:
n_inputs = 28*28
n_hidden = 300
n_hidden2 = 100
n_outputs = 10

In [4]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

#with tf.name_scope("dnn"):
#    hidden1 = neuron_layer(X, n_hidden, name="hidden1",
#                          activation=tf.nn.relu)
#    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
#                          activation=tf.nn.relu)
#    logits = neuron_layer(hidden2, n_outputs, name="outputs")

learning_rate= 0.01

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden, name="hidden1",
                          activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                          activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [5]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir,now)

acc_train_summary = tf.summary.scalar("acc_train", accuracy)
acc_val_summary = tf.summary.scalar("acc_val", accuracy)

file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
saver = tf.train.Saver()

## Execution Phase

In [6]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("./tmp/data/")

Extracting ./tmp/data/train-images-idx3-ubyte.gz
Extracting ./tmp/data/train-labels-idx1-ubyte.gz
Extracting ./tmp/data/t10k-images-idx3-ubyte.gz
Extracting ./tmp/data/t10k-labels-idx1-ubyte.gz


In [7]:
n_epochs = 100
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, 
                                           y: mnist.validation.labels})
        
        ## summary strings
        acc_train_summary_str = acc_train_summary.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val_summary_str = acc_val_summary.eval(feed_dict={X: mnist.validation.images, 
                                                              y: mnist.validation.labels})
        file_writer.add_summary(acc_train_summary_str, epoch)
        file_writer.add_summary(acc_val_summary_str, epoch)
        
        print(epoch, "Train Accuracy: ", acc_train, 
                     "Validation Accuracy: ", acc_val)
    
    save_path = saver.save(sess, "./tmp/ckpts/my_model_final.ckpt")

0 Train Accuracy:  0.9 Validation Accuracy:  0.902
1 Train Accuracy:  0.98 Validation Accuracy:  0.924
2 Train Accuracy:  0.84 Validation Accuracy:  0.9342
3 Train Accuracy:  0.92 Validation Accuracy:  0.9414
4 Train Accuracy:  0.92 Validation Accuracy:  0.9478
5 Train Accuracy:  0.86 Validation Accuracy:  0.953
6 Train Accuracy:  0.98 Validation Accuracy:  0.9554
7 Train Accuracy:  0.98 Validation Accuracy:  0.9584
8 Train Accuracy:  0.94 Validation Accuracy:  0.9594
9 Train Accuracy:  0.92 Validation Accuracy:  0.9636
10 Train Accuracy:  0.98 Validation Accuracy:  0.9646
11 Train Accuracy:  0.96 Validation Accuracy:  0.9658
12 Train Accuracy:  0.98 Validation Accuracy:  0.9688
13 Train Accuracy:  0.98 Validation Accuracy:  0.9674
14 Train Accuracy:  0.92 Validation Accuracy:  0.9704
15 Train Accuracy:  0.98 Validation Accuracy:  0.9716
16 Train Accuracy:  0.98 Validation Accuracy:  0.9714
17 Train Accuracy:  1.0 Validation Accuracy:  0.972
18 Train Accuracy:  0.98 Validation Accuracy

## Using the Neural Network

In [None]:
with tf.Session() as sess:
    saver.restore(sess, "./tmp/ckpts/my_model_final.ckpt")
    X_test = mnist.test.images
    Z = logits.eval(feed_dict={X:X_test})
    y_pred = np.argmax(Z, axis=1)