**Hello World Example**

_This notebook is partly based on Chapter 9, 11 and 13 of Aurelien Geron: Hand-on Machine Learning with Scikit-learn & Tensorflow._

# Setup

First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
NB_ID = "MNIST_tensorflow"

# create the directory if it does not exist
os.makedirs(os.path.join(PROJECT_ROOT_DIR, "images", NB_ID), exist_ok = True)
        
def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", NB_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

A couple utility functions to plot grayscale and RGB images:

In [2]:
def plot_image(image):
    plt.imshow(image, cmap="gray", interpolation="nearest")
    plt.axis("off")

def plot_color_image(image):
    plt.imshow(image.astype(np.uint8),interpolation="nearest")
    plt.axis("off")

And of course we will need TensorFlow:

In [3]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


# Using graphs

In [4]:
x = tf.Variable(2, name="x")
y = tf.Variable(3, name="y")
f = 3*x*x + 2*y*x

In [5]:
with tf.Session() as sess:
  # initialize variables
  x.initializer.run() # initializes x to 2
  y.initializer.run() # initializes y to 3
  result = f.eval() # runs the graph and assigns to result
  # session closes with end of block

print(result) # Will print 24


24


**Let's manipulate a second graph just because we can**

In [6]:
graph = tf.Graph()
with graph.as_default():
  # graph becomes default
  z = tf.Variable( 1, "z in graph" )
  # graph ends as default with end of block


In [7]:
print( "Is x in default? {} ".format( x.graph is tf.get_default_graph() ))
print( "Is z in graph? {} ".format( z.graph is graph ))


Is x in default? True 
Is z in graph? True 


**Autodiff with tf.gradients** 

In [8]:
reset_graph()
x = tf.Variable(2.0, name="x")
y = tf.Variable(3.0, name="y")

f = 3*x*x + 2*y*x

gF = tf.gradients(f, [x, y])
init = tf.global_variables_initializer()

with tf.Session() as sess:
  sess.run(init)
  result = f.eval()
  gradF = sess.run(gF)

print(result)
print(gradF)


24.0
[18.0, 4.0]


# MNIST from scratch

Test and Training Data: Let's split into training images and test images.


In [9]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

**Constants for network configuration**

In [10]:
nInputs = 28*28  # MNIST
nHidden1 = 300
nHidden2 = 100
nOutputs = 10

In [11]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, nInputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

# Dense Layer

Define a dense layer from scratch. Code Aurelien Geron: Hand-on Machine Learning, Chpt. 10. 

In [12]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [13]:
with tf.name_scope("MLP"):
    hidden1 = neuron_layer(X, nHidden1, name="hidden1", activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, nHidden2, name="hidden2", activation=tf.nn.relu)
    logits = neuron_layer(hidden2, nOutputs, name="outputs")

In [14]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [15]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [16]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [17]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [18]:
n_epochs = 40
batch_size = 50

In [19]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [20]:
# Keep log files separate for each run by
# using current date and time
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
print(now)
rootDir = "tb_logs" # use a directory relative to current dir 
# logDir = "{}/log_{}/".format(rootDir,now)
# For this example we keep it all in one log file
logDir = "{}/log_test/".format(rootDir,now)

# write the graph – here we assume that the graph is the default
fw = tf.summary.FileWriter(logDir, tf.get_default_graph())

20181120140826


In [21]:
batchAcc = tf.summary.scalar('Batch_Acc.', accuracy )
validAcc = tf.summary.scalar('Valid_Acc.', accuracy )


In [22]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        batch_index = 0;
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            batch_index = batch_index + batch_size
            if batch_index % 10 == 0:
                bAcc = batchAcc.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * batch_size + batch_index
                fw.add_summary( bAcc, step )
                vAcc = validAcc.eval(feed_dict={X: X_valid, y: y_valid})
                fw.add_summary( vAcc, step )
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)


    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.9 Val accuracy: 0.9146
1 Batch accuracy: 0.92 Val accuracy: 0.936
2 Batch accuracy: 0.96 Val accuracy: 0.945
3 Batch accuracy: 0.92 Val accuracy: 0.9512
4 Batch accuracy: 0.98 Val accuracy: 0.9558
5 Batch accuracy: 0.96 Val accuracy: 0.9566
6 Batch accuracy: 1.0 Val accuracy: 0.9612
7 Batch accuracy: 0.94 Val accuracy: 0.9628
8 Batch accuracy: 0.98 Val accuracy: 0.965
9 Batch accuracy: 0.96 Val accuracy: 0.9658
10 Batch accuracy: 0.92 Val accuracy: 0.9686
11 Batch accuracy: 0.98 Val accuracy: 0.9688
12 Batch accuracy: 0.98 Val accuracy: 0.967
13 Batch accuracy: 0.98 Val accuracy: 0.9708
14 Batch accuracy: 1.0 Val accuracy: 0.9712
15 Batch accuracy: 0.94 Val accuracy: 0.973
16 Batch accuracy: 1.0 Val accuracy: 0.9732
17 Batch accuracy: 1.0 Val accuracy: 0.9742
18 Batch accuracy: 1.0 Val accuracy: 0.9744
19 Batch accuracy: 0.98 Val accuracy: 0.975
20 Batch accuracy: 1.0 Val accuracy: 0.9754
21 Batch accuracy: 1.0 Val accuracy: 0.976
22 Batch accuracy: 0.98 Val accurac

# CNN Layer

Define a CNN layer from scratch. 

In [None]:
# X is assumed to be of shape batchSize, width, height, nChannels

def cnn_layer(X, filterSz, nFilters, name, stride=1, activation=None):
    with tf.name_scope(name):
        # random initialization of filters
        stddev = 2 / np.sqrt(int(X.get_shape()[3])+nFilters)
        init = tf.truncated_normal((filterSz,filterSz,
                                    int(X.get_shape()[3]),nFilters), 
                                    stddev=stddev)
        filt = tf.Variable(init, name="filt")
        # 4D inputs, filters, output
        Z_conv = tf.nn.conv2d(X, filt, 
                              strides=[1,stride,stride,1], padding="SAME")
        if activation is not None:
            return activation(Z_conv)
        else:
            return 