# Creating Your First Graph and Running It in a Session

First graph and four different ways of execution. Also learning how to set a default session (last execution).

In [None]:
%load_ext autotime

In [None]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
import os
from datetime import datetime

In [None]:
# Creating the graph
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

In [None]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

In [None]:
result = 0
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
print(result)

In [None]:
result = 0
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    result = f.eval()
print(result)

In [None]:
result = 0
init = tf.global_variables_initializer()
# This will set 'sess' as my default session
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

# Managing Graphs

In [None]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

In [None]:
x2 = tf.Variable(2)
x2.graph is tf.get_default_graph()

In [None]:
graph = tf.Graph()
# Setting 'graph' as the default graph
with graph.as_default():
    x2 = tf.Variable(2)
    print(x2.graph is graph)
    print(x2.graph is tf.get_default_graph())
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

# Lifecycle of a Node Value

Running a graph causes TensorFlow to run all the dependencies associated with the graph (w and x on the example). After the run, the dependencies will be dropped. If you have two graphs that have the same dependencies there is the option to run them both in one only graph.

In [None]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

In [None]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

### Replicating for $10 ^ 4$ runs

In [None]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val = y.eval()
        z_val = z.eval()

In [None]:
%%time

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    for i in range(10**4):
        y_val, z_val = sess.run([y, z])

# Linear Regression with TensorFlow
## Adjusting linear regression by Normal Equation: 
$ \theta = (X^T X)^{-1} X^T y$

In [None]:
housing = fetch_california_housing()
m, n = housing.data.shape

housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

with tf.Session() as sess:
    theta_value = theta.eval()
    
    print("MSE =", mse.eval())

## Implementing Gradient Descent

Adjusting linear regression by gradient descent: $ \theta^{i+1} = \theta - \eta \nabla_{\theta}MSE(\theta) $

### Manualy computing the gradients
$ MSE = \frac{1}{M}\sum_{n=1}^{M} (\hat{y_{n}} - y_{n})^2$

$ \hat{y} = X \theta$

$\nabla_{\theta}MSE(\theta) = \frac{2}{M} X^T (\hat{y} - y)$

In [None]:
scaler = StandardScaler()

In [None]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

### Using autodiff to compute the grandients

In [None]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

### Using an optimizer

Gradient Descent Optimizer

In [None]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Momentum Optimizer

In [None]:
%%time
n_epochs = 1001
learning_rate = 0.01

scaled_housing_data_plus_bias = housing_data_plus_bias
scaled_housing_data_plus_bias[:, 1:] = scaler.fit_transform(housing_data_plus_bias[:, 1:])

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

## Feeding Data to the Training Algorithm

In [None]:
housing_target = housing.target.reshape((-1, 1))
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    idx = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[idx]
    y_batch = housing_target[idx]
    
    return X_batch, y_batch

In [None]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            _, mse_val = sess.run([training_op, mse], feed_dict={X: X_batch, y: y_batch})
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse_val)

    best_theta = theta.eval()

#### TODO: investigate why the batch algorithm is so much slower

## Saving and Restoring Models

### Saving

Using the Gradient Descent Optimizer version and saving sessions per 100 epochs

In [None]:
if not os.path.exists('chp9/models/'):
    if not os.path.exists('chp9/'):
        os.mkdir('chp9')
    os.mkdir('chp9/models')

In [None]:
if not os.path.exists('chp9/models/lr_gdo'):
    os.mkdir('chp9/models/lr_gdo')

In [None]:
%%time
n_epochs = 1001
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        sess.run(training_op)
        if epoch % 200 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            saver.save(sess, f'chp9/models/lr_gdo/lr_gdo_epoch{epoch}.ckpt')

    best_theta = theta.eval()

### Loading

In [None]:
with tf.Session() as sess:
    saver.restore(sess, 'chp9/models/lr_gdo/lr_gdo_epoch200.ckpt')
    best_theta = theta.eval()
    print(best_theta)
    print("MSE =", mse.eval())
    
    saver.restore(sess, 'chp9/models/lr_gdo/lr_gdo_epoch1000.ckpt')
    best_theta = theta.eval()
    print(best_theta)
    print("MSE =", mse.eval())

## Visualizing the Graph and Training curves Using TensorBoard

In [None]:
now = datetime.utcnow().strftime("%Y%m%d%%M%S")
root_logdir = 'chp9/models/lr_gdo/tf_logs'
log_dir = f'{root_logdir}/run-{now}'

In [None]:
%%time
n_epochs = 1001
learning_rate = 0.01

now = datetime.utcnow().strftime("%Y%m%d%%M%S")
root_logdir = 'chp9/models/lr_gdo/tf_logs'
logdir = f'{root_logdir}/run-{now}'

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

# TensorBoard part
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        sess.run(training_op)
        if epoch % 200 == 0:
            #print("Epoch", epoch, "MSE =", mse.eval())
            saver.save(sess, f'chp9/models/lr_gdo/lr_gdo_epoch{epoch}.ckpt')
        if epoch % 20 == 0:
            summary_str = mse_summary.eval()
            file_writer.add_summary(summary_str, epoch)
    best_theta = theta.eval()
file_writer.close()

Now run on terminal, in the directory of this notebook:
```shell
$ tensorboard --logdir chp9/models/lr_gdo/tf_logs/
```