# Lecture 12: Introduction to TensorFlow II

In [1]:
import datetime
now = datetime.datetime.now()
print("Version: " + now.strftime("%Y-%m-%d %H:%M:%S"))

Version: 2019-01-09 16:55:52


In [2]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# To make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12 

In [3]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
reset_graph()

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
housing_data_target = housing.target.reshape(-1, 1)

  from ._conv import register_converters as _register_converters


In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

# Saving and restoring models

Models and parameters can be saved easily.

Good to not only save the final model but to also checkpoint (i.e. save intermediate models) as training performed.

In [5]:
reset_graph()

n_epochs = 1000                                                                      
learning_rate = 0.01                                                                  

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")            
y = tf.constant(housing_data_target, dtype=tf.float32, name="y")            
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")                                      
error = y_pred - y                                                                    
mse = tf.reduce_mean(tf.square(error), name="mse")                                    
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)            
training_op = optimizer.minimize(mse)                                                 

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [6]:
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())                                
            save_path = saver.save(sess, "./my_model.ckpt")
        sess.run(training_op)
    
    best_theta = theta.eval()
    save_path = saver.save(sess, "./my_model_final.ckpt")
    

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.7145006
Epoch 200 MSE = 0.56670463
Epoch 300 MSE = 0.5555716
Epoch 400 MSE = 0.5488117
Epoch 500 MSE = 0.5436362
Epoch 600 MSE = 0.53962916
Epoch 700 MSE = 0.53650916
Epoch 800 MSE = 0.5340678
Epoch 900 MSE = 0.53214717


In [7]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

Models can then be restored easily.

In [8]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")
    best_theta_restored = theta.eval() 

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


In [9]:
best_theta_restored

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

In [10]:
np.allclose(best_theta, best_theta_restored)

True

Computational graph definition is saved in file with `.meta` extension.

Can also load graphs.

In [11]:
reset_graph()  # start with an empty graph.

saver = tf.train.import_meta_graph("./my_model_final.ckpt.meta")  # load the graph structure
theta = tf.get_default_graph().get_tensor_by_name("theta:0") 

with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")  # restores the graph's state
    best_theta_restored = theta.eval() 

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


In [12]:
np.allclose(best_theta, best_theta_restored)

True

# Visualising computational graphs with TensorBoard

TensorBoard provides functionality to visualise computational graphs and training statistics.

## Logging

In [13]:
reset_graph()

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [14]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

Create summary and file writer:

In [15]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [16]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [17]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  
    indices = np.random.randint(m, size=batch_size)  
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing_data_target[indices] 
    return X_batch, y_batch

In [18]:
with tf.Session() as sess:                                                        
    sess.run(init)                                                                

    for epoch in range(n_epochs):                                                
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval() 

In [19]:
file_writer.close()

In [20]:
best_theta

array([[ 2.0703337 ],
       [ 0.8637145 ],
       [ 0.12255151],
       [-0.31211874],
       [ 0.38510373],
       [ 0.00434168],
       [-0.01232954],
       [-0.83376896],
       [-0.8030471 ]], dtype=float32)

## TensorBoard

Now we can inspect the logs in TensorBoard.

At the command line run:

`tensorboard --logdir tf_logs`

Open in a browser on port 6006, i.e. localhost:6006...

# More complex models

## Name scopes

Large complex models can easily become cluttered with many nodes making then difficult to visualise directly.

To avoid this problem *name scopes* can be created to group nodes.

In [21]:
reset_graph()

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

Create `error` and `mse` inside `loss` name scope:

In [22]:
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

Set up optimizer:

In [23]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

Run:

In [24]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()

In [25]:
file_writer.flush()
file_writer.close()
print("Best theta:")
print(best_theta)

Best theta:
[[ 2.0703337 ]
 [ 0.8637145 ]
 [ 0.12255151]
 [-0.31211874]
 [ 0.38510373]
 [ 0.00434168]
 [-0.01232954]
 [-0.83376896]
 [-0.8030471 ]]


View in TensorBoard...

In [26]:
print(error.op.name)

loss/sub


In [27]:
print(mse.op.name)

loss/mse


## Modularity

Many graph components are often repeated.

For example, let's create a graph with two rectified linear units (ReLUs).

Could just repeat code but this is not efficient coding and is prone to errors.

In [28]:
reset_graph()

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z2, 0., name="relu2")  

output = tf.add(relu1, relu2, name="output")

Better approach is to create a function defining a ReLU.

In [29]:
reset_graph()

def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X, w), b, name="z")
    return tf.maximum(z, 0., name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

In [30]:
file_writer = tf.summary.FileWriter("tf_logs/relu1", tf.get_default_graph())

View in TensorBoard...

Still somewhat confusing.  

Use name scopes to make more clear.

In [31]:
reset_graph()

def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)                          
        w = tf.Variable(tf.random_normal(w_shape), name="weights")    
        b = tf.Variable(0.0, name="bias")                             
        z = tf.add(tf.matmul(X, w), b, name="z")                      
        return tf.maximum(z, 0., name="max")

In [32]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

file_writer = tf.summary.FileWriter("tf_logs/relu2", tf.get_default_graph())
file_writer.close()

## Sharing variables

Often want to share variables between nodes.  

For example, say we want to consider ReLUs with non-zero thresholds.

One way is to simply pass parameters around as variables.

In [33]:
reset_graph()

def relu(X, threshold):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)                        
        w = tf.Variable(tf.random_normal(w_shape), name="weights")  
        b = tf.Variable(0.0, name="bias")                           
        z = tf.add(tf.matmul(X, w), b, name="z")                    
        return tf.maximum(z, threshold, name="max")

threshold = tf.Variable(0.0, name="threshold")
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X, threshold) for i in range(5)]
output = tf.add_n(relus, name="output")

While this works perfectly fine, TensorFlow offers an alternative approach that is cleaner and more modular.

Use TensorFlow `tf.get_variable` function to create (if it doesn't yet exist) or reuse variables.

In [34]:
reset_graph()

with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(),
                                initializer=tf.constant_initializer(0.0))

This will actually raise an exception if the variable has already be created by an earlier call to `tf.get_variable`.

Need to explicitly specify that can reuse variables.

Can be performed by setting variable scope's `reuse` attribute to True, either by:

In [35]:
with tf.variable_scope("relu", reuse=True):
    threshold = tf.get_variable("threshold")

or by setting explicitly:

In [36]:
with tf.variable_scope("relu") as scope:
    scope.reuse_variables()
    threshold = tf.get_variable("threshold")

Using the shared threshold variable:

In [37]:
reset_graph()

def relu(X):
    with tf.variable_scope("relu", reuse=True):
        threshold = tf.get_variable("threshold")
        w_shape = int(X.get_shape()[1]), 1                          
        w = tf.Variable(tf.random_normal(w_shape), name="weights")  
        b = tf.Variable(0.0, name="bias")                           
        z = tf.add(tf.matmul(X, w), b, name="z")                    
        return tf.maximum(z, threshold, name="max")

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(),
                                initializer=tf.constant_initializer(0.0))
relus = [relu(X) for relu_index in range(5)]
output = tf.add_n(relus, name="output")

In [38]:
file_writer = tf.summary.FileWriter("tf_logs/relu3", tf.get_default_graph())
file_writer.close()

View in TensorBoard...

Could also put the `threshold` inside the first ReLU:

In [39]:
reset_graph()

def relu(X):
    threshold = tf.get_variable("threshold", shape=(),
                                initializer=tf.constant_initializer(0.0))
    w_shape = (int(X.get_shape()[1]), 1)                        
    w = tf.Variable(tf.random_normal(w_shape), name="weights")  
    b = tf.Variable(0.0, name="bias")                           
    z = tf.add(tf.matmul(X, w), b, name="z")                     
    return tf.maximum(z, threshold, name="max")

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = []
for relu_index in range(5):
    with tf.variable_scope("relu", reuse=(relu_index >= 1)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name="output")

In [40]:
file_writer = tf.summary.FileWriter("tf_logs/relu4", tf.get_default_graph())
file_writer.close()

View in TensorBoard...

# Neural networks in TensorFlow

## Using TF.Learn (high-level API)

TF.Learn provides a high-level TensorFlow API in Python that is compatible with SciKit-Learn.

In [41]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [42]:
X_train = mnist.train.images
X_test = mnist.test.images
y_train = mnist.train.labels.astype("int")
y_test = mnist.test.labels.astype("int")

In [43]:
import tensorflow as tf

config = tf.contrib.learn.RunConfig(tf_random_seed=42) 

feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                         feature_columns=feature_cols, config=config)
dnn_clf = tf.contrib.learn.SKCompat(dnn_clf) 
dnn_clf.fit(X_train, y_train, batch_size=50, steps=40000)

INFO:tensorflow:Using config: {'_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_num_ps_replicas': 0, '_log_step_count_steps': 100, '_tf_random_seed': 42, '_task_type': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_save_checkpoints_steps': None, '_environment': 'local', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x10abc1f98>, '_master': '', '_save_checkpoints_secs': 600, '_model_dir': '/tmp/tmpe69c9rh9', '_task_id': 0, '_session_config': None, '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_is_chief': True, '_num_worker_replicas': 0}
Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpe69c9rh9/model.ckpt.
INFO:tensorflow:loss = 2.4005778, step = 1
INFO:tensorflow:global_step/sec: 264.728
INFO:tensorflow:loss = 0.31267586, step = 101 (0.379 sec)
INFO:tensorflow:global_step/sec: 2

INFO:tensorflow:global_step/sec: 295.925
INFO:tensorflow:loss = 0.008367192, step = 7101 (0.338 sec)
INFO:tensorflow:global_step/sec: 290.367
INFO:tensorflow:loss = 0.04883387, step = 7201 (0.344 sec)
INFO:tensorflow:global_step/sec: 298.618
INFO:tensorflow:loss = 0.009504036, step = 7301 (0.335 sec)
INFO:tensorflow:global_step/sec: 291.725
INFO:tensorflow:loss = 0.016978893, step = 7401 (0.343 sec)
INFO:tensorflow:global_step/sec: 286.606
INFO:tensorflow:loss = 0.010757222, step = 7501 (0.349 sec)
INFO:tensorflow:global_step/sec: 291.264
INFO:tensorflow:loss = 0.010587335, step = 7601 (0.343 sec)
INFO:tensorflow:global_step/sec: 293.598
INFO:tensorflow:loss = 0.005931136, step = 7701 (0.341 sec)
INFO:tensorflow:global_step/sec: 296.404
INFO:tensorflow:loss = 0.005197429, step = 7801 (0.337 sec)
INFO:tensorflow:global_step/sec: 293.652
INFO:tensorflow:loss = 0.006449755, step = 7901 (0.341 sec)
INFO:tensorflow:global_step/sec: 288.311
INFO:tensorflow:loss = 0.0020574287, step = 8001 (0

INFO:tensorflow:loss = 0.0023113445, step = 15101 (0.434 sec)
INFO:tensorflow:global_step/sec: 260.202
INFO:tensorflow:loss = 0.001015892, step = 15201 (0.384 sec)
INFO:tensorflow:global_step/sec: 264.948
INFO:tensorflow:loss = 0.0022939122, step = 15301 (0.377 sec)
INFO:tensorflow:global_step/sec: 186.205
INFO:tensorflow:loss = 0.0028404044, step = 15401 (0.537 sec)
INFO:tensorflow:global_step/sec: 227.866
INFO:tensorflow:loss = 0.005412121, step = 15501 (0.439 sec)
INFO:tensorflow:global_step/sec: 230.416
INFO:tensorflow:loss = 0.0035895878, step = 15601 (0.434 sec)
INFO:tensorflow:global_step/sec: 205.284
INFO:tensorflow:loss = 0.0058891536, step = 15701 (0.487 sec)
INFO:tensorflow:global_step/sec: 215.393
INFO:tensorflow:loss = 0.0008186471, step = 15801 (0.465 sec)
INFO:tensorflow:global_step/sec: 241.404
INFO:tensorflow:loss = 0.00085781404, step = 15901 (0.414 sec)
INFO:tensorflow:global_step/sec: 234.486
INFO:tensorflow:loss = 0.0065113665, step = 16001 (0.426 sec)
INFO:tensorf

INFO:tensorflow:loss = 0.0023677696, step = 23101 (0.336 sec)
INFO:tensorflow:global_step/sec: 288.996
INFO:tensorflow:loss = 0.0018610213, step = 23201 (0.346 sec)
INFO:tensorflow:global_step/sec: 291.823
INFO:tensorflow:loss = 0.0018711936, step = 23301 (0.343 sec)
INFO:tensorflow:global_step/sec: 293.561
INFO:tensorflow:loss = 0.0005051533, step = 23401 (0.340 sec)
INFO:tensorflow:global_step/sec: 290.037
INFO:tensorflow:loss = 0.00076665386, step = 23501 (0.345 sec)
INFO:tensorflow:global_step/sec: 292.423
INFO:tensorflow:loss = 0.000556651, step = 23601 (0.342 sec)
INFO:tensorflow:global_step/sec: 294.917
INFO:tensorflow:loss = 0.0001052184, step = 23701 (0.339 sec)
INFO:tensorflow:global_step/sec: 293.751
INFO:tensorflow:loss = 0.00091980194, step = 23801 (0.340 sec)
INFO:tensorflow:global_step/sec: 294.418
INFO:tensorflow:loss = 0.0017980053, step = 23901 (0.340 sec)
INFO:tensorflow:global_step/sec: 295.052
INFO:tensorflow:loss = 0.0014662343, step = 24001 (0.339 sec)
INFO:tenso

INFO:tensorflow:global_step/sec: 276.7
INFO:tensorflow:loss = 0.0011234849, step = 31101 (0.361 sec)
INFO:tensorflow:global_step/sec: 290.911
INFO:tensorflow:loss = 0.00031862652, step = 31201 (0.344 sec)
INFO:tensorflow:global_step/sec: 293.03
INFO:tensorflow:loss = 0.0004291339, step = 31301 (0.341 sec)
INFO:tensorflow:global_step/sec: 291.187
INFO:tensorflow:loss = 0.0013877174, step = 31401 (0.343 sec)
INFO:tensorflow:global_step/sec: 297.709
INFO:tensorflow:loss = 0.00020001482, step = 31501 (0.336 sec)
INFO:tensorflow:global_step/sec: 299.657
INFO:tensorflow:loss = 0.00032674294, step = 31601 (0.334 sec)
INFO:tensorflow:global_step/sec: 275.276
INFO:tensorflow:loss = 0.0006365755, step = 31701 (0.364 sec)
INFO:tensorflow:global_step/sec: 240.812
INFO:tensorflow:loss = 0.00016916303, step = 31801 (0.415 sec)
INFO:tensorflow:global_step/sec: 290.958
INFO:tensorflow:loss = 0.0006920603, step = 31901 (0.344 sec)
INFO:tensorflow:global_step/sec: 295.674
INFO:tensorflow:loss = 0.000129

INFO:tensorflow:loss = 0.0001628006, step = 39001 (0.420 sec)
INFO:tensorflow:global_step/sec: 245.218
INFO:tensorflow:loss = 0.0007085502, step = 39101 (0.408 sec)
INFO:tensorflow:global_step/sec: 250.064
INFO:tensorflow:loss = 0.00033850258, step = 39201 (0.400 sec)
INFO:tensorflow:global_step/sec: 225.221
INFO:tensorflow:loss = 0.00038915616, step = 39301 (0.444 sec)
INFO:tensorflow:global_step/sec: 247.8
INFO:tensorflow:loss = 0.0003968734, step = 39401 (0.404 sec)
INFO:tensorflow:global_step/sec: 261.599
INFO:tensorflow:loss = 0.00015025295, step = 39501 (0.382 sec)
INFO:tensorflow:global_step/sec: 245.64
INFO:tensorflow:loss = 0.00062390114, step = 39601 (0.408 sec)
INFO:tensorflow:global_step/sec: 230.621
INFO:tensorflow:loss = 0.00014465627, step = 39701 (0.433 sec)
INFO:tensorflow:global_step/sec: 216.937
INFO:tensorflow:loss = 0.0011388173, step = 39801 (0.461 sec)
INFO:tensorflow:global_step/sec: 232.34
INFO:tensorflow:loss = 0.0007998731, step = 39901 (0.430 sec)
INFO:tenso

SKCompat()

In [44]:
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test)
accuracy_score(y_test, y_pred['classes'])

INFO:tensorflow:Restoring parameters from /tmp/tmpe69c9rh9/model.ckpt-40000


0.9835

Better accuracy than we achieved with Scitkit-Learn!


## Using plain TensorFlow

The standard API, as we've focused on previously, provides much more control in constructing and training the network architecture.

### Construction of the computational graph

In [45]:
import tensorflow as tf

n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [46]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

Each layer in the network is similar so let's define a general layer that we can reuse.

In [47]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs) # More on this in next lecture
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In practice, TensorFlow contains many built-in functions so it's generally not necessary to define layers like this (see, e.g., `tf.layers.dense`). 

Now let's construct 3 layers.

In [48]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")

Train using cross-entropy, where softmax applied when defining cross-entropy rather than in network construction.

In [49]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

Define gradient descent optimizer.

In [50]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

Define nodes to evaluate accuracy.

The function [`in_top_k`](https://www.tensorflow.org/api_docs/python/tf/nn/in_top_k) checks whether the targets (`y`) are in the top k predictions (`logits`).

In [51]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

Finally, define initializer.

In [52]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

### Execution (training)

In [53]:
n_epochs = 10
batch_size = 50

In [54]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images,
                                            y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Val accuracy:", acc_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Train accuracy: 0.9 Val accuracy: 0.9146
1 Train accuracy: 0.94 Val accuracy: 0.9348
2 Train accuracy: 0.92 Val accuracy: 0.9466
3 Train accuracy: 0.96 Val accuracy: 0.9508
4 Train accuracy: 0.92 Val accuracy: 0.9586
5 Train accuracy: 0.94 Val accuracy: 0.9584
6 Train accuracy: 0.98 Val accuracy: 0.9608
7 Train accuracy: 0.96 Val accuracy: 0.9636
8 Train accuracy: 0.92 Val accuracy: 0.9638
9 Train accuracy: 0.96 Val accuracy: 0.965


### Using the trained network to make predictions

In [55]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt") 
    X_new_scaled = mnist.test.images[:20]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


Recall that we only defined the network to compute the logits.  If require class probabilities then need to apply softmax function.  But that is not needed if just want to make single prediction  (i.e. just pick class with largest logit value).

In [56]:
print("Predicted classes:", y_pred)
print("Actual classes:   ", mnist.test.labels[:20])

Predicted classes: [7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
Actual classes:    [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
