# Creating Your First Graph and Running It in a Session

First graph and four different ways of execution. Also learning how to set a default session (last execution).

In [1]:
%load_ext autotime

In [2]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
import os
from datetime import datetime

time: 1.35 s


In [3]:
# Creating the graph
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

Instructions for updating:
Colocations handled automatically by placer.
time: 35.8 ms


In [4]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

42
time: 71.3 ms


In [5]:
result = 0
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
print(result)

42
time: 39 ms


In [6]:
result = 0
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    result = f.eval()
print(result)

42
time: 155 ms


In [7]:
result = 0
init = tf.global_variables_initializer()
# This will set 'sess' as my default session
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42
time: 79.8 ms


# Managing Graphs

In [8]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

time: 104 ms


In [9]:
x2 = tf.Variable(2)
x2.graph is tf.get_default_graph()

True

time: 127 ms


In [10]:
graph = tf.Graph()
# Setting 'graph' as the default graph
with graph.as_default():
    x2 = tf.Variable(2)
    print(x2.graph is graph)
    print(x2.graph is tf.get_default_graph())
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

True
True
True
False
time: 101 ms


# Lifecycle of a Node Value

Running a graph causes TensorFlow to run all the dependencies associated with the graph (w and x on the example). After the run, the dependencies will be dropped. If you have two graphs that have the same dependencies there is the option to run them both in one only graph.

In [11]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15
CPU times: user 11.3 ms, sys: 4.54 ms, total: 15.8 ms
Wall time: 14.2 ms
time: 53.6 ms


In [12]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15
CPU times: user 14.6 ms, sys: 0 ns, total: 14.6 ms
Wall time: 13.1 ms
time: 87.8 ms


### Replicating for $10 ^ 4$ runs

In [13]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val = y.eval()
        z_val = z.eval()

CPU times: user 2.63 s, sys: 134 ms, total: 2.76 s
Wall time: 1.82 s
time: 1.87 s


In [14]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val, z_val = sess.run([y, z])

CPU times: user 1.58 s, sys: 91.8 ms, total: 1.67 s
Wall time: 1.22 s
time: 1.23 s


# Linear Regression with TensorFlow
## Adjusting linear regression by Normal Equation: 
$ \theta = (X^T X)^{-1} X^T y$

In [15]:
housing = fetch_california_housing()
m, n = housing.data.shape

housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

with tf.Session() as sess:
    theta_value = theta.eval()
    
    print("MSE =", mse.eval())

MSE = 0.52434915
time: 46.9 ms


## Implementing Gradient Descent

Adjusting linear regression by gradient descent: $ \theta^{i+1} = \theta - \eta \nabla_{\theta}MSE(\theta) $

### Manualy computing the gradients
$ MSE = \frac{1}{M}\sum_{n=1}^{M} (\hat{y_{n}} - y_{n})^2$

$ \hat{y} = X \theta$

$\nabla_{\theta}MSE(\theta) = \frac{2}{M} X^T (\hat{y} - y)$

In [16]:
scaler = StandardScaler()

time: 22.6 ms


In [17]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 14.243605
Epoch 100 MSE = 0.73185533
Epoch 200 MSE = 0.5386133
Epoch 300 MSE = 0.5333081
Epoch 400 MSE = 0.53155
Epoch 500 MSE = 0.53018916
Epoch 600 MSE = 0.5290989
Epoch 700 MSE = 0.52822185
Epoch 800 MSE = 0.5275132
Epoch 900 MSE = 0.52693903
Epoch 1000 MSE = 0.52647233
CPU times: user 463 ms, sys: 6.99 ms, total: 470 ms
Wall time: 377 ms
time: 445 ms


### Using autodiff to compute the grandients

In [18]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 12.731786
Epoch 100 MSE = 0.8906498
Epoch 200 MSE = 0.64897776
Epoch 300 MSE = 0.6135527
Epoch 400 MSE = 0.5918469
Epoch 500 MSE = 0.5758504
Epoch 600 MSE = 0.5638534
Epoch 700 MSE = 0.55480635
Epoch 800 MSE = 0.5479522
Epoch 900 MSE = 0.54273427
Epoch 1000 MSE = 0.5387426
CPU times: user 530 ms, sys: 26.5 ms, total: 556 ms
Wall time: 491 ms
time: 493 ms


### Using an optimizer

Gradient Descent Optimizer

In [19]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 6.71938
Epoch 100 MSE = 1.0184149
Epoch 200 MSE = 0.81773955
Epoch 300 MSE = 0.73550314
Epoch 400 MSE = 0.6771615
Epoch 500 MSE = 0.6350104
Epoch 600 MSE = 0.6045326
Epoch 700 MSE = 0.5824868
Epoch 800 MSE = 0.5665334
Epoch 900 MSE = 0.5549832
Epoch 1000 MSE = 0.54661614
CPU times: user 384 ms, sys: 20.8 ms, total: 405 ms
Wall time: 310 ms
time: 312 ms


Momentum Optimizer

In [20]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 15.942357
Epoch 100 MSE = 0.5315806
Epoch 200 MSE = 0.52497107
Epoch 300 MSE = 0.5244025
Epoch 400 MSE = 0.52433175
Epoch 500 MSE = 0.5243224
Epoch 600 MSE = 0.52432114
Epoch 700 MSE = 0.524321
Epoch 800 MSE = 0.524321
Epoch 900 MSE = 0.52432096
Epoch 1000 MSE = 0.52432096
CPU times: user 544 ms, sys: 35.6 ms, total: 580 ms
Wall time: 470 ms
time: 474 ms


## Feeding Data to the Training Algorithm

In [21]:
housing_target = housing.target.reshape((-1, 1))
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    idx = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[idx]
    y_batch = housing_target[idx]
    
    return X_batch, y_batch

time: 848 µs


In [22]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            _, mse_val = sess.run([training_op, mse], feed_dict={X: X_batch, y: y_batch})
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse_val)

    best_theta = theta.eval()

Epoch 0 MSE = 3.9309347
Epoch 100 MSE = 1.0551947
Epoch 200 MSE = 0.6202288
Epoch 300 MSE = 0.6609783
Epoch 400 MSE = 0.48806977
Epoch 500 MSE = 0.515194
Epoch 600 MSE = 0.41066602
Epoch 700 MSE = 0.49822804
Epoch 800 MSE = 0.73221624
Epoch 900 MSE = 0.6849343
Epoch 1000 MSE = 0.44657856
time: 1min 3s


#### TODO: investigate why the batch algorithm is so much slower

## Saving and Restoring Models

### Saving

Using the Gradient Descent Optimizer version and saving sessions per 100 epochs

In [23]:
if not os.path.exists('chp9/models/'):
    if not os.path.exists('chp9/'):
        os.mkdir('chp9')
    os.mkdir('chp9/models')

time: 703 µs


In [24]:
if not os.path.exists('chp9/models/lr_gdo'):
    os.mkdir('chp9/models/lr_gdo')

time: 68.9 ms


In [25]:
%%time
n_epochs = 1001
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        sess.run(training_op)
        if epoch % 200 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            saver.save(sess, f'chp9/models/lr_gdo/lr_gdo_epoch{epoch}.ckpt')

    best_theta = theta.eval()

Epoch 0 MSE = 3.2112293
Epoch 200 MSE = 0.5408854
Epoch 400 MSE = 0.53283066
Epoch 600 MSE = 0.5292797
Epoch 800 MSE = 0.5272719
Epoch 1000 MSE = 0.52611387
Instructions for updating:
Use standard file APIs to delete files with this prefix.
CPU times: user 606 ms, sys: 106 ms, total: 712 ms
Wall time: 618 ms
time: 703 ms


### Loading

In [26]:
with tf.Session() as sess:
    saver.restore(sess, 'chp9/models/lr_gdo/lr_gdo_epoch200.ckpt')
    best_theta = theta.eval()
    print(best_theta)
    print("MSE =", mse.eval())
    
    saver.restore(sess, 'chp9/models/lr_gdo/lr_gdo_epoch1000.ckpt')
    best_theta = theta.eval()
    print(best_theta)
    print("MSE =", mse.eval())

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from chp9/models/lr_gdo/lr_gdo_epoch200.ckpt
[[ 2.0460427 ]
 [ 0.85075015]
 [ 0.18975708]
 [-0.21466142]
 [ 0.23740135]
 [ 0.03893556]
 [-0.05524277]
 [-0.57464874]
 [-0.5447832 ]]
MSE = 0.5408854
INFO:tensorflow:Restoring parameters from chp9/models/lr_gdo/lr_gdo_epoch1000.ckpt
[[ 2.0685523e+00]
 [ 8.4960401e-01]
 [ 1.3333330e-01]
 [-2.8302097e-01]
 [ 3.1125107e-01]
 [ 5.4262805e-04]
 [-4.0985990e-02]
 [-7.7482873e-01]
 [-7.4682987e-01]]
MSE = 0.52611387
time: 56.6 ms


## Visualizing the Graph and Training curves Using TensorBoard

In [27]:
now = datetime.utcnow().strftime("%Y%m%d%%M%S")
root_logdir = 'chp9/models/lr_gdo/tf_logs'
log_dir = f'{root_logdir}/run-{now}'

time: 258 ms


In [28]:
%%time
n_epochs = 1001
learning_rate = 0.01

now = datetime.utcnow().strftime("%Y%m%d%%M%S")
root_logdir = 'chp9/models/lr_gdo/tf_logs'
logdir = f'{root_logdir}/run-{now}'

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

# TensorBoard part
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        sess.run(training_op)
        if epoch % 200 == 0:
            #print("Epoch", epoch, "MSE =", mse.eval())
            saver.save(sess, f'chp9/models/lr_gdo/lr_gdo_epoch{epoch}.ckpt')
        if epoch % 20 == 0:
            summary_str = mse_summary.eval()
            file_writer.add_summary(summary_str, epoch)
    best_theta = theta.eval()
file_writer.close()

CPU times: user 754 ms, sys: 159 ms, total: 914 ms
Wall time: 840 ms
time: 1.08 s


Now run on terminal, in the directory of this notebook:
```shell
$ tensorboard --logdir chp9/models/lr_gdo/tf_logs/
```