# Creating Your First Graph and Running It in a Session

First graph and four different ways of execution. Also learning how to set a default session (last execution).

In [1]:
%load_ext autotime

In [2]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

time: 42.2 s


In [3]:
# Creating the graph
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

Instructions for updating:
Colocations handled automatically by placer.
time: 794 ms


In [4]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

42
time: 1.89 s


In [5]:
result = 0
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
print(result)

42
time: 11.2 ms


In [6]:
result = 0
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    result = f.eval()
print(result)

42
time: 138 ms


In [7]:
result = 0
init = tf.global_variables_initializer()
# This will set 'sess' as my default session
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42
time: 13.3 ms


# Managing Graphs

In [8]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

time: 32.5 ms


In [9]:
x2 = tf.Variable(2)
x2.graph is tf.get_default_graph()

True

time: 21 ms


In [10]:
graph = tf.Graph()
# Setting 'graph' as the default graph
with graph.as_default():
    x2 = tf.Variable(2)
    print(x2.graph is graph)
    print(x2.graph is tf.get_default_graph())
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

True
True
True
False
time: 23.2 ms


# Lifecycle of a Node Value

Running a graph causes TensorFlow to run all the dependencies associated with the graph (w and x on the example). After the run, the dependencies will be dropped. If you have two graphs that have the same dependencies there is the option to run them both in one only graph.

In [11]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15
CPU times: user 13.8 ms, sys: 4.89 ms, total: 18.7 ms
Wall time: 53.9 ms
time: 59.2 ms


In [12]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15
CPU times: user 21.6 ms, sys: 571 µs, total: 22.2 ms
Wall time: 19.9 ms
time: 24.7 ms


### Replicating for $10 ^ 4$ runs

In [13]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val = y.eval()
        z_val = z.eval()

CPU times: user 2.4 s, sys: 145 ms, total: 2.55 s
Wall time: 1.64 s
time: 1.65 s


In [14]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val, z_val = sess.run([y, z])

CPU times: user 1.39 s, sys: 59.1 ms, total: 1.45 s
Wall time: 998 ms
time: 999 ms


# Linear Regression with TensorFlow
## Adjusting linear regression by Normal Equation: 
$ \theta = (X^T X)^{-1} X^T y$

In [15]:
housing = fetch_california_housing()
m, n = housing.data.shape

housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

with tf.Session() as sess:
    theta_value = theta.eval()
    
    print("MSE =", mse.eval())

MSE = 0.52434915
time: 568 ms


## Implementing Gradient Descent

Adjusting linear regression by gradient descent: $ \theta^{i+1} = \theta - \eta \nabla_{\theta}MSE(\theta) $

### Manualy computing the gradients
$ MSE = \frac{1}{M}\sum_{n=1}^{M} (\hat{y_{n}} - y_{n})^2$

$ \hat{y} = X \theta$

$\nabla_{\theta}MSE(\theta) = \frac{2}{M} X^T (\hat{y} - y)$

In [16]:
scaler = StandardScaler()

time: 554 µs


In [17]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 6.760354
Epoch 100 MSE = 0.8243887
Epoch 200 MSE = 0.7025383
Epoch 300 MSE = 0.65508187
Epoch 400 MSE = 0.6211762
Epoch 500 MSE = 0.59631604
Epoch 600 MSE = 0.5780295
Epoch 700 MSE = 0.5645427
Epoch 800 MSE = 0.55456746
Epoch 900 MSE = 0.5471665
Epoch 1000 MSE = 0.54165655
CPU times: user 393 ms, sys: 16.5 ms, total: 410 ms
Wall time: 489 ms
time: 505 ms


### Using autodiff to compute the grandients

In [18]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 9.650943
Epoch 100 MSE = 0.7003664
Epoch 200 MSE = 0.55874896
Epoch 300 MSE = 0.5493404
Epoch 400 MSE = 0.54434675
Epoch 500 MSE = 0.54048294
Epoch 600 MSE = 0.5374134
Epoch 700 MSE = 0.53496027
Epoch 800 MSE = 0.53299165
Epoch 900 MSE = 0.5314053
Epoch 1000 MSE = 0.5301227
CPU times: user 464 ms, sys: 6.17 ms, total: 470 ms
Wall time: 560 ms
time: 573 ms


### Using an optimizer

Gradient Descent Optimizer

In [19]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 6.652069
Epoch 100 MSE = 0.9205546
Epoch 200 MSE = 0.74586886
Epoch 300 MSE = 0.68416363
Epoch 400 MSE = 0.6406676
Epoch 500 MSE = 0.6091436
Epoch 600 MSE = 0.58625585
Epoch 700 MSE = 0.56962126
Epoch 800 MSE = 0.5575181
Epoch 900 MSE = 0.548701
Epoch 1000 MSE = 0.5422687
CPU times: user 372 ms, sys: 12.9 ms, total: 385 ms
Wall time: 436 ms
time: 439 ms


Momentum Optimizer

In [20]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 9.580023
Epoch 100 MSE = 0.5425509
Epoch 200 MSE = 0.5253816
Epoch 300 MSE = 0.5244385
Epoch 400 MSE = 0.5243362
Epoch 500 MSE = 0.52432305
Epoch 600 MSE = 0.52432126
Epoch 700 MSE = 0.524321
Epoch 800 MSE = 0.524321
Epoch 900 MSE = 0.52432096
Epoch 1000 MSE = 0.52432096
CPU times: user 495 ms, sys: 26.2 ms, total: 521 ms
Wall time: 433 ms
time: 452 ms


## Feeding Data to the Training Algorithm

In [21]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    idx = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[idx]
    y_batch = housing.target.reshape((-1, 1))[idx]
    
    return X_batch, y_batch

time: 5.23 ms


In [None]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            _, mse_val = sess.run([training_op, mse], feed_dict={X: X_batch, y: y_batch})
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse_val)

    best_theta = theta.eval()

Epoch 0 MSE = 6.3331647
Epoch 100 MSE = 0.7154369
Epoch 200 MSE = 0.49151307
Epoch 300 MSE = 0.57718164
Epoch 400 MSE = 0.38209638
