In [1]:
import tensorflow as tf
print(tf.__version__)

# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

1.1.0


# Does it run?

In [2]:
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

In [3]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

42


In [4]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    print(result)

42


In [5]:
init = tf.global_variables_initializer() 

with tf.Session() as sess:
    init.run()
    result = f.eval()
    print(result)

42


In [6]:
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


# Managing Graphs

In [7]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [8]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
x2.graph is graph

True

In [9]:
x2.graph is tf.get_default_graph()

False

In [10]:
graph

<tensorflow.python.framework.ops.Graph at 0x7f92d7f8aa50>

In [11]:
graph.get_operations()

[<tf.Operation 'Variable/initial_value' type=Const>,
 <tf.Operation 'Variable' type=VariableV2>,
 <tf.Operation 'Variable/Assign' type=Assign>,
 <tf.Operation 'Variable/read' type=Identity>]

In [12]:
tf.get_default_graph().get_operations()

[<tf.Operation 'x/initial_value' type=Const>,
 <tf.Operation 'x' type=VariableV2>,
 <tf.Operation 'x/Assign' type=Assign>,
 <tf.Operation 'x/read' type=Identity>,
 <tf.Operation 'y/initial_value' type=Const>,
 <tf.Operation 'y' type=VariableV2>,
 <tf.Operation 'y/Assign' type=Assign>,
 <tf.Operation 'y/read' type=Identity>,
 <tf.Operation 'mul' type=Mul>,
 <tf.Operation 'mul_1' type=Mul>,
 <tf.Operation 'add' type=Add>,
 <tf.Operation 'add_1/y' type=Const>,
 <tf.Operation 'add_1' type=Add>,
 <tf.Operation 'init' type=NoOp>,
 <tf.Operation 'Variable/initial_value' type=Const>,
 <tf.Operation 'Variable' type=VariableV2>,
 <tf.Operation 'Variable/Assign' type=Assign>,
 <tf.Operation 'Variable/read' type=Identity>]

In [13]:
tf.reset_default_graph()
tf.get_default_graph().get_operations()

[]

# Lifecycle of a Node Value

In [14]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

In [15]:
# Inefficient. w and x are evaluated twice.
with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


In [16]:
# Efficient. w and x are evaluated once and both values are still
# calculated.
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


# Linear Regression

In [17]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [18]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [19]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
XT = tf.transpose(X)
theta = tf.matmul(
            tf.matmul(
                tf.matrix_inverse(tf.matmul(XT, X)), 
                XT), 
            y)

with tf.Session() as sess:
    theta_val = theta.eval()

# Implementing Gradient Descent

In [20]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[
    np.ones((m, 1)), 
    scaled_housing_data
]
print(scaled_housing_data_plus_bias.mean(axis=0))
print(scaled_housing_data_plus_bias.mean(axis=1))
print(scaled_housing_data_plus_bias.mean())
print(scaled_housing_data_plus_bias.shape)

[  1.00000000e+00   6.60969987e-17   5.50808322e-18   6.60969987e-17
  -1.06030602e-16  -1.10161664e-17   3.44255201e-18  -1.07958431e-15
  -8.52651283e-15]
[ 0.38915536  0.36424355  0.5116157  ..., -0.06612179 -0.06360587
  0.01359031]
0.111111111111
(20640, 9)


In [21]:
n_epochs = 1000
learning_rate = 0.01
momentum = 0.9

In [22]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

theta = tf.Variable(tf.random_uniform([n+1, 1], -1, 1), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

#Manual Differation
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
#Reverse Autodiff
# gradients = tf.gradients(mse, [theta])[0]
# training_op = tf.assign(theta, theta - learning_rate*gradients)
#Optimizer for Gradient Descent
# optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
#Optimizer with Momentum
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                       momentum=momentum)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [23]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in xrange(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 6.49314
Epoch 100 MSE = 0.54165
Epoch 200 MSE = 0.525592
Epoch 300 MSE = 0.524472
Epoch 400 MSE = 0.52434
Epoch 500 MSE = 0.524324
Epoch 600 MSE = 0.524321
Epoch 700 MSE = 0.524321
Epoch 800 MSE = 0.524321
Epoch 900 MSE = 0.524321


In [24]:
best_theta

array([[ 2.06855798],
       [ 0.82963139],
       [ 0.11875387],
       [-0.26554951],
       [ 0.30571494],
       [-0.00450234],
       [-0.03932671],
       [-0.89985842],
       [-0.87051511]], dtype=float32)

# Feeding Data

In [25]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val2 = B.eval(feed_dict={A: [[4,5,6], [7,8,9]]})

In [26]:
print(B_val1)
print(B_val2)

[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]


In [27]:
X = tf.placeholder(tf.float32, shape=(None, n+1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m/batch_size))

In [28]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch*n_batches+batch_size)
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

In [29]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in xrange(n_epochs):
        for batch_index in xrange(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    best_theta = theta.eval()

In [30]:
best_theta

array([[ 2.06855798],
       [ 0.82962054],
       [ 0.11875188],
       [-0.26552895],
       [ 0.30569792],
       [-0.00450293],
       [-0.03932633],
       [-0.89988279],
       [-0.87053829]], dtype=float32)

# Saving and Restoring Models

In [31]:
reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())                                # not shown
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        sess.run(training_op)
    
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt")

Epoch 0 MSE = 9.16154
Epoch 100 MSE = 0.714501
Epoch 200 MSE = 0.566705
Epoch 300 MSE = 0.555572
Epoch 400 MSE = 0.548812
Epoch 500 MSE = 0.543636
Epoch 600 MSE = 0.539629
Epoch 700 MSE = 0.536509
Epoch 800 MSE = 0.534068
Epoch 900 MSE = 0.532147


In [32]:
with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt


In [33]:
np.allclose(best_theta, best_theta_restored)

True

In [34]:
best_theta

array([[ 2.06855249],
       [ 0.88740271],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.66145277],
       [-0.6375277 ]], dtype=float32)

In [35]:
best_theta_restored

array([[ 2.06855249],
       [ 0.88740271],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.66145277],
       [-0.6375277 ]], dtype=float32)

# Visualizing using TensorBoard

In [36]:
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = b"<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [37]:
show_graph(tf.get_default_graph())

In [38]:
reset_graph()

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

In [39]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [40]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [41]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [42]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()

In [43]:
file_writer.close()

In [44]:
best_theta

array([[ 2.1242497 ],
       [ 0.90131176],
       [ 0.2054878 ],
       [-0.1690098 ],
       [ 0.48434627],
       [-0.11807442],
       [-0.4183332 ],
       [-0.93492216],
       [-0.86516005]], dtype=float32)

# Name Scopes

In [45]:
reset_graph()

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

In [46]:
with tf.name_scope("loss"):
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

In [50]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [52]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    best_theta = theta.eval()

file_writer.flush()
file_writer.close()
print("Best theta:", best_theta)

Best theta: [[ 2.1242497 ]
 [ 0.90131176]
 [ 0.2054878 ]
 [-0.1690098 ]
 [ 0.48434627]
 [-0.11807442]
 [-0.4183332 ]
 [-0.93492216]
 [-0.86516005]]


In [53]:
print(error.op.name)
print(mse.op.name)

loss/sub
loss/mse


In [54]:
show_graph(tf.get_default_graph())

# Modularity
This is ugly flat code

In [56]:
reset_graph()

# Bad
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weight1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weight2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(0.0, z1, name="relu1")
relu2 = tf.maximum(0.0, z2, name="relu2")

output = tf.add(relu1, relu2, name="output")

This is better, modularized and has name_scopes!

In [60]:
reset_graph()
def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, 0.0, name="max")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")