In [3]:
# The following code creates the graph
import tensorflow as tf
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2
with tf.Session() as sess:

    x.initializer.run()
    y.initializer.run()
    result = f.eval()
print(result)
# Calling x.initializer.run() is equivalent to calling tf.get_default_session().run(x.initializer) , and similarly f.eval() is
# equivalent to calling tf.get_default_session().run(f) . This makes the code easier to read.
# Moreover, the session is automatically closed at the end of the block.

42


In [11]:
# Instead of manually running the initializer for every single variable, you can use the
# global_variables_initializer() function. Note that it does not actually perform the initialization
# immediately, but rather creates a node in the graph that will initialize all variables when it is run:
init = tf.global_variables_initializer()
# prepare an init node
with tf.Session() as sess:
    init.run() # actually initialize all the variables
result = f.eval()
# Inside Jupyter or within a Python shell you may prefer to create an InteractiveSession . The only
# difference from a regular Session is that when an InteractiveSession is created it automatically setsitself as the default session, so you don’t need a with block (but you do need to close the session
# manually when you are done with it):
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

ValueError: Cannot evaluate tensor using `eval()`: No default session is registered. Use `with sess.as_default()` or pass an explicit session to `eval(session=sess)`

In [21]:
# Linear Regression with TensorFlow
# In the examples so far, the tensors just contained a single scalar value, but you can of course perform
# computations on arrays of any shape. For example, the following code manipulates 2D arrays to perform
# Linear Regression on the California housing dataset (introduced in Chapter 2). It starts by fetching the
# dataset; then it adds an extra bias input feature (x 0 = 1) to all training instances (it does so using NumPy so
# it runs immediately); then it creates two TensorFlow constant nodes, X and y , to hold this data and the
# targets, 4 and it uses some of the matrix operations provided by TensorFlow to define theta .

import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()
    print(theta_value)

[[-3.7465141e+01]
 [ 4.3573415e-01]
 [ 9.3382923e-03]
 [-1.0662201e-01]
 [ 6.4410698e-01]
 [-4.2513184e-06]
 [-3.7732250e-03]
 [-4.2664889e-01]
 [-4.4051403e-01]]


In [22]:

# Implementing Gradient Descent
# When using Gradient Descent, remember that it is important to first normalize the input feature vectors, or else training may be
# much slower. You can do this using TensorFlow, NumPy, Scikit-Learn’s StandardScaler , or any other solution you prefer. The
# following code assumes that this normalization has already been done.
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
 

Epoch 0 MSE = 2215843.2
Epoch 100 MSE = nan
Epoch 200 MSE = nan
Epoch 300 MSE = nan
Epoch 400 MSE = nan
Epoch 500 MSE = nan
Epoch 600 MSE = nan
Epoch 700 MSE = nan
Epoch 800 MSE = nan
Epoch 900 MSE = nan


In [None]:
   
#     , TensorFlow’s autodiff feature comes to the rescue: it can automatically and efficiently
# compute the gradients for you. Simply replace the gradients = ... line in the Gradient Descent code in
# the previous section with the following line, and the code will continue to work just fine:


gradients = tf.gradients(mse, [theta])[0]

# So TensorFlow computes the gradients for you. But it gets even easier: it also provides a number of
# optimizers out of the box, including a Gradient Descent optimizer. You can simply replace the preceding
# gradients = ... and training_op = ... lines with the following code, and once again everything
# will just work fine:

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)


# If you want to use a different type of optimizer, you just need to change one line. For example, you can use
# a momentum optimizer (which often converges much faster than Gradient Descent; see Chapter 11) by
# defining the optimizer like this:

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
momentum=0.9)

In [None]:
# Feeding Data to the Training Algorithm


# Let’s try to modify the previous code to implement Mini-batch Gradient Descent. For this, we need a way
# to replace X and y at every iteration with the next mini-batch. The simplest way to do this is to use
# placeholder nodes. These nodes are special because they don’t actually perform any computation, they
# just output the data you tell them to output at runtime. They are typically used to pass the training data to
# TensorFlow during training. If you don’t specify a value at runtime for a placeholder, you get an
# exception.
# To create a placeholder node, you must call the placeholder() function and specify the output tensor’s
# data type. Optionally, you can also specify its shape, if you want to enforce it. If you specify None for a
# dimension, it means “any size.” For example, the following code creates a placeholder node A , and also a
# node B = A + 5 . When we evaluate B , we pass a feed_dict to the eval() method that specifies the
# value of A . Note that A must have rank 2 (i.e., it must be two-dimensional) and there must be three columns
# (or else an exception is raised), but it can have any number of rows.

A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})
print(B_val_1)
# [[ 6. 7. 8.]]
print(B_val_2)
# [[ 9. 10. 11.]
# [ 12. 13. 14.]]
# To implement Mini-batch Gradient Descent, we only need to tweak the existing code slightly. First change
# the definition of X and y in the construction phase to make them placeholder nodes:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
# Then define the batch size and compute the total number of batches:
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
# Finally, in the execution phase, fetch the mini-batches one by one, then provide the value of X and y via
# the feed_dict parameter when evaluating a node that depends on either of them.
def fetch_batch(epoch, batch_index, batch_size):
    [...] # load the data from disk
    return X_batch, y_batch
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()

In [None]:
# TensorFlow makes saving and restoring a model very easy. Just create a Saver node at the end of the
# construction phase (after all variable nodes are created); then, in the execution phase, just call its save()
# method whenever you want to save the model, passing it the session and path of the checkpoint file:
[...]
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
[...]
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0: # checkpoint every 100 epochs
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        sess.run(training_op)
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt")
# Restoring a model is just as easy: you create a Saver at the end of the construction phase just like before,
# but then at the beginning of the execution phase, instead of initializing the variables using the init node,
# you call the restore() method of the Saver object:
with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")
    [...]
# By default, the save() method also saves the structure of the graph in a second file with the same name
# plus a .meta extension. You can load this graph structure using tf.train.import_meta_graph() . This
# adds the graph to the default graph, and returns a Saver instance that you can then use to restore the
# graph’s state (i.e., the variable values):
saver = tf.train.import_meta_graph("/tmp/my_model_final.ckpt.meta")
with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")
    [...]
#     This allows you to fully restore a saved model, including both the graph structure and the variable values,
# without having to search for the code that built it.

In [None]:
# Visualizing the Graph and Training Curves Using TensorBoard

# TensorBoard. If you feed it some training stats, it will display nice interactive visualizations of
# these stats in your web browser (e.g., learning curves).
# The first step is to tweak your program a bit so it writes the graph definition and some training stats — for
# example, the training error (MSE) — to a log directory that TensorBoard will read from. You need to use
# a different log directory every time you run your program, or else TensorBoard will merge stats from
# different runs, which will mess up the visualizations.
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

# Next, add the following code at the very end of the construction phase:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [None]:
# The first line creates a node in the graph that will evaluate the MSE value and write it to a TensorBoard-
# compatible binary log string called a summary. The second line creates a FileWriter that you will use
# to write summaries to logfiles in the log directory. The first parameter indicates the path of the log
# directory (in this case something like tf_logs/run-20160906091959/, relative to the current directory).
# The second (optional) parameter is the graph you want to visualize. Upon creation, the FileWriter
# creates the log directory if it does not already exist (and its parent directories if needed), and writes the
# graph definition in a binary logfile called an events file.
# Next you need to update the execution phase to evaluate the mse_summary node regularly during training
# (e.g., every 10 mini-batches). This will output a summary that you can then write to the events file using
# the file_writer . Here is the updated code:
[...]
for batch_index in range(n_batches):
    X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
    if batch_index % 10 == 0:
        summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
        step = epoch * n_batches + batch_index
        file_writer.add_summary(summary_str, step)
    sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

In [None]:
# Finally, you want to close the FileWriter at the end of the program:
file_writer.close()
# Now run this program: it will create the log directory and write an events file in this directory, containing
# both the graph definition and the MSE values. Open up a shell and go to your working directory, then type
# ls -l tf_logs/run* to list the contents of the log directory:
$ cd $ML_PATH
# Your ML working directory (e.g., $HOME/ml)
$ ls -l tf_logs/run*
# total 40
# -rw-r--r-- 1 ageron staff 18620 Sep 6 11:10 events.out.tfevents.1472553182.mymac
# If you run the program a second time, you should see a second directory in the tf_logs/ directory:
$ ls -l tf_logs/
# total 0
# drwxr-xr-x 3 ageron
# drwxr-xr-x 3 ageron
# staff
# staff
# 102 Sep
# 102 Sep
# 6 10:07 run-20160906091959
# 6 10:22 run-20160906092202
# Great! Now it’s time to fire up the TensorBoard server. You need to activate your virtualenv environment
# if you created one, then start the server by running the tensorboard command, pointing it to the root log
# directory. This starts the TensorBoard web server, listening on port 6006 (which is “goog” written upside
# down):
$ source env/bin/activate
$ tensorboard --logdir tf_logs/
Starting TensorBoard on port 6006
(You can navigate to http://0.0.0.0:6006)