# Up and Running with TensorFlow

Its basic principle is simple: you first define in Python a graph of computations to perform and then TensorFlow takes that graph and runs it efficiently using optimized C++ code.
It is possible to break up the graph into several chunks and run them in parallel across multiple CPUs or GPUs.

# Create and Run a Graph in a Session

In [2]:
import tensorflow as tf

x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

In [3]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)

result = sess.run(f)

print(result)

42


In [4]:
sess.close()

In [5]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    
print(result)

42


In [6]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run() # initialize all the variables
    result = f.eval()
    print(result)

42


# Managing Graphs

In [7]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [8]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)

In [9]:
x2.graph is graph

True

In [10]:
x2.graph is tf.get_default_graph()

False

# Lifecycle of a Node Value

In [11]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

In [12]:
with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


In [13]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


# Linear Regression with TensorFlow

In [14]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]


X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')

XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)), XT), y)

In [15]:
with tf.Session() as sess:
    theta_value = theta.eval()
    print(theta_value)

[[ -3.74651413e+01]
 [  4.35734153e-01]
 [  9.33829229e-03]
 [ -1.06622010e-01]
 [  6.44106984e-01]
 [ -4.25131839e-06]
 [ -3.77322501e-03]
 [ -4.26648885e-01]
 [ -4.40514028e-01]]


# Implementing Gradient Descent

In [16]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = 2/m * tf.matmul(tf.transpose(X), error)

training_op = tf.assign(theta, theta - learning_rate*gradients)

init = tf.global_variables_initializer()

In [17]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%500 == 0:
            print('Epoch:', epoch, 'MSE:', mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
    print(best_theta)

Epoch: 0 MSE: 7.90323
Epoch: 500 MSE: 0.545248
[[ 2.06855249]
 [ 0.8029449 ]
 [ 0.13881755]
 [-0.16824435]
 [ 0.20463267]
 [ 0.00314262]
 [-0.04045486]
 [-0.77865618]
 [-0.7439388 ]]


# Using autodiff
Can you find the equation to compute the partial derivatives of the following function?

In [18]:
def my_func(a, b):
    z = 0
    for i in range(100):
        z = a*np.cos(z + i) + z*np.sin(b - i)
    return z

In [19]:
my_func(0.2, 0.3)

-0.21253923284754916

In [20]:
a = tf.Variable(0.2, name='a')
b = tf.Variable(0.3, name='b')
z = tf.Variable(0.0, name='z')

for i in range(100):
    z = a*tf.cos(z + i) + z*tf.sin(b - i)
    
grads = tf.gradients(z, [a, b])
init = tf.global_variables_initializer()

In [21]:
with tf.Session() as sess:
    init.run()
    print(z.eval())
    print(sess.run(grads))

-0.212537
[-1.1388494, 0.19671395]


In [22]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

gradients = tf.gradients(mse, [theta])[0] # changed!

In [23]:
training_op = tf.assign(theta, theta - learning_rate*gradients)
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print('Epoch:', epoch, 'MSE:', mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
    print(best_theta)

Epoch: 0 MSE: 3.50426
Epoch: 100 MSE: 0.685798
Epoch: 200 MSE: 0.615917
Epoch: 300 MSE: 0.592334
Epoch: 400 MSE: 0.575408
Epoch: 500 MSE: 0.562869
Epoch: 600 MSE: 0.553542
Epoch: 700 MSE: 0.546578
Epoch: 800 MSE: 0.541358
Epoch: 900 MSE: 0.537429
[[ 2.06855249]
 [ 0.8683545 ]
 [ 0.1538965 ]
 [-0.28656098]
 [ 0.30015546]
 [ 0.00777949]
 [-0.0431252 ]
 [-0.60731763]
 [-0.57995838]]


# Using a GradientDescentOptimizer¶

In [24]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

In [25]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [26]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print('Epoch:', epoch, 'MSE:', mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
    print(best_theta)

Epoch: 0 MSE: 4.70856
Epoch: 100 MSE: 0.771987
Epoch: 200 MSE: 0.680369
Epoch: 300 MSE: 0.641908
Epoch: 400 MSE: 0.613812
Epoch: 500 MSE: 0.59278
Epoch: 600 MSE: 0.576957
Epoch: 700 MSE: 0.565002
Epoch: 800 MSE: 0.555926
Epoch: 900 MSE: 0.549002
[[ 2.06855249]
 [ 0.90034163]
 [ 0.16632918]
 [-0.33495927]
 [ 0.33488339]
 [ 0.0118872 ]
 [-0.04485895]
 [-0.48665121]
 [-0.46243271]]


# Using a Momentum optimizer

In [27]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                      momentum=0.9)
training_op = optimizer.minimize(mse)


init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print('Epoch:', epoch, 'MSE:', mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
    print(best_theta)

Epoch: 0 MSE: 6.22632
Epoch: 100 MSE: 0.528012
Epoch: 200 MSE: 0.524617
Epoch: 300 MSE: 0.524357
Epoch: 400 MSE: 0.524326
Epoch: 500 MSE: 0.524322
Epoch: 600 MSE: 0.524321
Epoch: 700 MSE: 0.524321
Epoch: 800 MSE: 0.524321
Epoch: 900 MSE: 0.524321
[[ 2.06855774]
 [ 0.82962537]
 [ 0.11875279]
 [-0.26553825]
 [ 0.30570567]
 [-0.00450268]
 [-0.03932649]
 [-0.89987195]
 [-0.87052804]]


# Feeding Data to the Training Algorithm

In [28]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

with tf.Session() as sess:
    b1 = B.eval(feed_dict={A: [[1,2,3]]})
    b2 = B.eval(feed_dict={A: [[4,5,6], [7,8,9]]})
    
print(b1)
print(b2)

[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]


## Mini-Batch Gradient Descent

In [29]:
m, n = housing.data.shape

X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')


batch_size = 128
n_batches = int(np.ceil(m / batch_size))

In [30]:
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
                                    
training_op = optimizer.minimize(mse)


init = tf.global_variables_initializer()

In [31]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  
    indices = np.random.randint(m, size=batch_size)  
    X_batch = scaled_housing_data_plus_bias[indices] 
    y_batch = housing.target.reshape(-1, 1)[indices] 
    return X_batch, y_batch

In [32]:
n_epochs = 10
learning_rate = 0.01

In [33]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()

In [34]:
print('Best theta\n', best_theta)

Best theta
 [[ 2.06649661]
 [ 0.77800006]
 [ 0.12371483]
 [-0.13735965]
 [ 0.24040698]
 [-0.00584436]
 [-0.04645694]
 [-0.90367991]
 [-0.85554904]]


# Saving and restoring a model¶

In [35]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)


init = tf.global_variables_initializer()
# saver
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print('Epoch:', epoch, 'MSE:', mse.eval())
            #save_path = saver.save(sess, "/tmp/my_model.ckpt")
        sess.run(training_op)
    best_theta = theta.eval()
    save_path = saver.save(sess, './tmp/my_model_final.ckpt')
    
print(best_theta)

Epoch: 0 MSE: 9.05204
Epoch: 100 MSE: 0.837254
Epoch: 200 MSE: 0.67758
Epoch: 300 MSE: 0.633802
Epoch: 400 MSE: 0.603527
Epoch: 500 MSE: 0.581677
Epoch: 600 MSE: 0.565881
Epoch: 700 MSE: 0.554455
Epoch: 800 MSE: 0.546188
Epoch: 900 MSE: 0.540203
[[ 2.06855249]
 [ 0.73839146]
 [ 0.14287852]
 [-0.01604763]
 [ 0.06548349]
 [ 0.00545796]
 [-0.03942202]
 [-0.80967194]
 [-0.76597875]]


In [36]:
with tf.Session() as sess:
    saver.restore(sess, "./tmp/my_model_final.ckpt")
    best_theta_restored = theta.eval()
best_theta_restored

INFO:tensorflow:Restoring parameters from ./tmp/my_model_final.ckpt


array([[ 2.06855249],
       [ 0.73839146],
       [ 0.14287852],
       [-0.01604763],
       [ 0.06548349],
       [ 0.00545796],
       [-0.03942202],
       [-0.80967194],
       [-0.76597875]], dtype=float32)

In [37]:
tf.reset_default_graph()

saver = tf.train.import_meta_graph("./tmp/my_model_final.ckpt.meta")  # this loads the graph structure
theta = tf.get_default_graph().get_tensor_by_name("theta:0") # not shown in the book

with tf.Session() as sess:
    saver.restore(sess, "./tmp/my_model_final.ckpt")  # this restores the graph's state
    best_theta_restored = theta.eval() 

INFO:tensorflow:Restoring parameters from ./tmp/my_model_final.ckpt


# Visualizing the Graph

In [38]:
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = b"<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [39]:
show_graph(tf.get_default_graph())

# Using TensorBoard¶

In [40]:
tf.reset_default_graph()

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [41]:
n_epochs = 5000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=16), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [42]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [43]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [44]:
with tf.Session() as sess:                                                      
    sess.run(init)                                                      

    for epoch in range(n_epochs):                                        
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()                                             

In [45]:
file_writer.close()
best_theta

array([[ 2.07029438],
       [ 0.85185266],
       [ 0.1188163 ],
       [-0.29350281],
       [ 0.37137547],
       [ 0.00318078],
       [-0.01125147],
       [-0.87467641],
       [-0.842489  ]], dtype=float32)

In [46]:
# source env/bin/activate
# tensorboard --logdir tf_logs/

# Name Scopes

In [47]:
with tf.name_scope('loss') as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name='mse')

In [48]:
print(error.op.name)

loss/sub


In [49]:
print(mse.op.name)

loss/mse


In [50]:
show_graph(tf.get_default_graph()) # check loss scope

# Modularity

In [52]:
tf.reset_default_graph()


n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z1, 0., name="relu2")  # Oops, cut&paste error! Did you spot it?

output = tf.add(relu1, relu2, name="output")

In [55]:
tf.reset_default_graph()

def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)                          
        w = tf.Variable(tf.random_normal(w_shape), name="weights")    
        b = tf.Variable(0.0, name="bias")                             
        z = tf.add(tf.matmul(X, w), b, name="z")                      
        return tf.maximum(z, 0., name="max")                          

In [56]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

file_writer = tf.summary.FileWriter("logs/relu2", tf.get_default_graph())
file_writer.close()

In [57]:
show_graph(tf.get_default_graph()) # check relu scope