# Creating Your First Graph and Running It in a Session

First graph and four different ways of execution. Also learning how to set a default session (last execution).

In [1]:
%load_ext autotime

In [2]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

time: 1.44 s


In [3]:
# Creating the graph
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

time: 32.3 ms


In [4]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

42
time: 65.6 ms


In [5]:
result = 0
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
print(result)

42
time: 30.6 ms


In [6]:
result = 0
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    result = f.eval()
print(result)

42
time: 89.2 ms


In [7]:
result = 0
init = tf.global_variables_initializer()
# This will set 'sess' as my default session
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42
time: 27 ms


# Managing Graphs

In [8]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

time: 63.7 ms


In [9]:
x2 = tf.Variable(2)
x2.graph is tf.get_default_graph()

True

time: 73.7 ms


In [10]:
graph = tf.Graph()
# Setting 'graph' as the default graph
with graph.as_default():
    x2 = tf.Variable(2)
    print(x2.graph is graph)
    print(x2.graph is tf.get_default_graph())
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

True
True
True
False
time: 34.2 ms


# Lifecycle of a Node Value

Running a graph causes TensorFlow to run all the dependencies associated with the graph (w and x on the example). After the run, the dependencies will be dropped. If you have two graphs that have the same dependencies there is the option to run them both in one only graph.

In [11]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15
CPU times: user 33.1 ms, sys: 5.37 ms, total: 38.4 ms
Wall time: 35.7 ms
time: 105 ms


In [12]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15
CPU times: user 20.4 ms, sys: 3.69 ms, total: 24.1 ms
Wall time: 23.8 ms
time: 40.6 ms


### Replicating for $10 ^ 4$ runs

In [13]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val = y.eval()
        z_val = z.eval()

CPU times: user 2.71 s, sys: 140 ms, total: 2.85 s
Wall time: 2.05 s
time: 2.06 s


In [14]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val, z_val = sess.run([y, z])

CPU times: user 1.64 s, sys: 60.7 ms, total: 1.7 s
Wall time: 1.29 s
time: 1.29 s


# Linear Regression with TensorFlow
## Adjusting linear regression by Normal Equation: 
$ \theta = (X^T X)^{-1} X^T y$

In [15]:
housing = fetch_california_housing()
m, n = housing.data.shape

housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

with tf.Session() as sess:
    theta_value = theta.eval()
    
    print("MSE =", mse.eval())

MSE = 0.5243257
time: 45 ms


## Implementing Gradient Descent

Adjusting linear regression by gradient descent: $ \theta^{i+1} = \theta - \eta \nabla_{\theta}MSE(\theta) $

### Manualy computing the gradients
$ MSE = \frac{1}{M}\sum_{n=1}^{M} (\hat{y_{n}} - y_{n})^2$

$ \hat{y} = X \theta$

$\nabla_{\theta}MSE(\theta) = \frac{2}{M} X^T (\hat{y} - y)$

In [16]:
scaler = StandardScaler()

time: 101 ms


In [17]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 10.428479
Epoch 100 MSE = 0.80934376
Epoch 200 MSE = 0.64072967
Epoch 300 MSE = 0.60800713
Epoch 400 MSE = 0.5857024
Epoch 500 MSE = 0.5694654
Epoch 600 MSE = 0.55760884
Epoch 700 MSE = 0.54893565
Epoch 800 MSE = 0.542579
Epoch 900 MSE = 0.5379098
Epoch 1000 MSE = 0.53447235
CPU times: user 409 ms, sys: 17.5 ms, total: 426 ms
Wall time: 330 ms
time: 386 ms


### Using autodiff to compute the grandients

In [18]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 4.6016593
Epoch 100 MSE = 0.7099825
Epoch 200 MSE = 0.6273729
Epoch 300 MSE = 0.6005379
Epoch 400 MSE = 0.5813292
Epoch 500 MSE = 0.56714463
Epoch 600 MSE = 0.5566287
Epoch 700 MSE = 0.5488074
Epoch 800 MSE = 0.5429681
Epoch 900 MSE = 0.5385923
Epoch 1000 MSE = 0.5352987
CPU times: user 428 ms, sys: 29.3 ms, total: 457 ms
Wall time: 373 ms
time: 377 ms


### Using an optimizer

Gradient Descent Optimizer

In [19]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 12.429156
Epoch 100 MSE = 0.91886514
Epoch 200 MSE = 0.6724873
Epoch 300 MSE = 0.62898344
Epoch 400 MSE = 0.6006144
Epoch 500 MSE = 0.5801139
Epoch 600 MSE = 0.565204
Epoch 700 MSE = 0.5543439
Epoch 800 MSE = 0.5464209
Epoch 900 MSE = 0.54063207
Epoch 1000 MSE = 0.536395
CPU times: user 451 ms, sys: 0 ns, total: 451 ms
Wall time: 362 ms
time: 365 ms


Momentum Optimizer

In [20]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 4.2856407
Epoch 100 MSE = 0.5373106
Epoch 200 MSE = 0.5248914
Epoch 300 MSE = 0.5243766
Epoch 400 MSE = 0.52432775
Epoch 500 MSE = 0.5243216
Epoch 600 MSE = 0.5243211
Epoch 700 MSE = 0.524321
Epoch 800 MSE = 0.52432084
Epoch 900 MSE = 0.5243208
Epoch 1000 MSE = 0.5243209
CPU times: user 401 ms, sys: 29.2 ms, total: 430 ms
Wall time: 322 ms
time: 324 ms


## Feeding Data to the Training Algorithm

In [21]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    idx = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[idx]
    y_batch = housing.target.reshape((-1, 1))[idx]
    
    return X_batch, y_batch

time: 760 µs


In [22]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            _, mse_val = sess.run([training_op, mse], feed_dict={X: X_batch, y: y_batch})
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse_val)

    best_theta = theta.eval()

Epoch 0 MSE = 7.5358124
Epoch 100 MSE = 0.78660613
Epoch 200 MSE = 0.49880895
Epoch 300 MSE = 0.585263
Epoch 400 MSE = 0.39971
Epoch 500 MSE = 0.43882915
Epoch 600 MSE = 0.37123835
Epoch 700 MSE = 0.4754081
Epoch 800 MSE = 0.7102132
Epoch 900 MSE = 0.6552994
Epoch 1000 MSE = 0.4419458
time: 1min 7s
