# Creating your 1st graph and running it in a session

In [1]:
import tensorflow as tf

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

In [2]:
session = tf.Session()
session.run(x.initializer)
session.run(y.initializer)
result = session.run(f)
print(result)
session.close()

42


In [3]:
# To avoid repeating session.run()
with tf.Session() as session:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
result

42

In [4]:
# Avoid repeating initializer, use a global variable.
# It also initializes the variables.
init = tf.global_variables_initializer()
with tf.Session() as session:
    init.run()
    result = f.eval()
result

42

In [5]:
# Interactive session sets the actual session as a default session
session = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
session.close()

42


# Managing graphs

In [6]:
x1 = tf.Variable(3)
with tf.Session() as session:
    print(x1.graph is tf.get_default_graph())

True


In [7]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(1)
    print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())

True
False


# Lifecycle of a Node Value

In [8]:
# tf makes two calls to w & x
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as session:
    print(y.eval())
    print(z.eval())

10
15


In [9]:
# Evaluate both expression to execute w & x once
with tf.Session() as session:
    y_val, z_val = session.run([y, z])
    print(y_val)
    print(z_val)

10
15


# Linear Regression with TensorFlow

In [10]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as session:
    thetha_value = theta.eval()
print(thetha_value.shape)

(9, 1)


# Implementing Gradient Descent

## Manually computing the gradients

In [11]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [12]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print ("Epoch", epoch, "MSE = ", mse.eval())
        session.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE =  12.408
Epoch 100 MSE =  0.755197
Epoch 200 MSE =  0.542087
Epoch 300 MSE =  0.53317
Epoch 400 MSE =  0.530538
Epoch 500 MSE =  0.528797
Epoch 600 MSE =  0.527548
Epoch 700 MSE =  0.52665
Epoch 800 MSE =  0.526001
Epoch 900 MSE =  0.525533


## Using autodiff

In [13]:
## autodiff simplifies the computation of gradients
reset_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0] # Using autodiff feature for the previous example.
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print ("Epoch", epoch, "MSE = ", mse.eval())
        session.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE =  12.408
Epoch 100 MSE =  0.755197
Epoch 200 MSE =  0.542087
Epoch 300 MSE =  0.53317
Epoch 400 MSE =  0.530538
Epoch 500 MSE =  0.528797
Epoch 600 MSE =  0.527548
Epoch 700 MSE =  0.52665
Epoch 800 MSE =  0.526001
Epoch 900 MSE =  0.525533


## Using an Optimizer

In [14]:
# TF also has some optimizers, including one for GradiendDescent. 
# This is good bc it allows to try other optimizers with model, 
# instead of implement everything. E.g. MomentumOptimizer
reset_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # Gradiend descent optimizer
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print ("Epoch", epoch, "MSE = ", mse.eval())
        session.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE =  12.408
Epoch 100 MSE =  0.755197
Epoch 200 MSE =  0.542087
Epoch 300 MSE =  0.53317
Epoch 400 MSE =  0.530538
Epoch 500 MSE =  0.528797
Epoch 600 MSE =  0.527548
Epoch 700 MSE =  0.52665
Epoch 800 MSE =  0.526001
Epoch 900 MSE =  0.525533


# Feeding Data to the Training algorithm

In [15]:
# A placeholder is only used to output data. They do not perform computations
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})
print(B_val_1)
print(B_val_2)

[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]


## Implementing mini batch gradient descent


In [16]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

X = tf.placeholder(tf.float32, shape=[None, n + 1], name="X")
y = tf.placeholder(tf.float32, shape=[None, 1], name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # Gradiend descent optimizer
training_op = optimizer.minimize(mse)

def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            session.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()
best_theta

array([[ 2.07144761],
       [ 0.84620118],
       [ 0.11558535],
       [-0.26835832],
       [ 0.32982782],
       [ 0.00608358],
       [ 0.07052915],
       [-0.87988573],
       [-0.86342508]], dtype=float32)

# Saving and restoring models

_save()_ method also saves the structure of the graph in a second file with the same name plus a .meta extension.


In [17]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # Gradiend descent optimizer
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [18]:
# Saving a model
saver = tf.train.Saver()

with tf.Session() as session:
    session.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0: # checkpoint every 100 epochs
            save_path = saver.save(session, "mytf_model.cpkt")
            
        session.run(training_op)
    best_theta = theta.eval()
    save_path = saver.save(session, "mytf_model_final.cpkt")

In [19]:
# Restoring model
saver = tf.train.Saver()

with tf.Session() as session:
    saver.restore(session, "mytf_model_final.cpkt")

INFO:tensorflow:Restoring parameters from mytf_model_final.cpkt


# Visualizing the Graph and Training Curves using TensorBoard

In [37]:
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

In [38]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

X = tf.placeholder(tf.float32, shape=[None, n + 1], name="X")
y = tf.placeholder(tf.float32, shape=[None, 1], name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

init = tf.global_variables_initializer()

# Store info for Tensor Board
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

with tf.Session() as session:
    session.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            # Add summary for Tensor Board
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)

            session.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()
    file_writer.close()
best_theta

array([[ 2.07144761],
       [ 0.84620118],
       [ 0.11558535],
       [-0.26835832],
       [ 0.32982782],
       [ 0.00608358],
       [ 0.07052915],
       [-0.87988573],
       [-0.86342508]], dtype=float32)

# Name Scopes
Group related nodes in a scope

In [39]:
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

In [40]:
print(error.op.name)
print(mse.op.name)

loss/sub
loss/mse


# Modularity

In [49]:
# TF add a _{number} for each time relu func is exec.
def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X, w), b, name="z")
    return tf.maximum(z, 0., name="relu")

In [50]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

# Sharing Variables
1. Adding parameters to a function
2. Dicionary containing all variables
3. Class for each module
4. Set the shared variable as an atribute of the function first call, use _hasattr_ func
5. Use *get_variable()* to create/reuse the shared var. Behavior is controlled by an attribute of the current *variable_scope()*

In [82]:
# Example 5: generates an error if the variable has been already created

with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))

ValueError: Variable relu/threshold already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:

  File "<ipython-input-80-375009a313ff>", line 3, in <module>
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
  File "/Users/cuent/anaconda/envs/book/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2847, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/Users/cuent/anaconda/envs/book/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2787, in run_ast_nodes
    if self.run_code(code, result):


In [88]:
# reuse variable
with tf.variable_scope("relu", reuse=True):
    treshold = tf.get_variable("threshold")
    
# call reuse attribute inside block
with tf.variable_scope("relu") as scope:
    scope.reuse_variables()
    threshold = tf.get_variable("threshold")

## Call shared variable from a method

In [94]:
def relu_sv(X):
    with tf.variable_scope("relu_sv", reuse=True):
        threshold = tf.get_variable("threshold_sv") # reuse existing variable
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="max")

In [95]:
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("relu_sv"): # create variable
    threshold = tf.get_variable("threshold_sv", shape=(), initializer=tf.constant_initializer(0.0))
    
relus = [relu(X) for relu_index in range(5)]
output = tf.add_n(relus, name="output")

# Exercises

Implement Logistic Regression with Mini-batch Gradient Descent using TensorFlow. Train it and evaluate it on the moons dataset. Try adding all the bells and whistles: 
- Define the graph within a logistic_regression() function that can be reused easily. 
- Save checkpoints using a Saver at regular intervals during training, and save the final model at the end of training. 
- Restore the last checkpoint upon startup if training was interrupted. 
- Define the graph using name scopes so the graph looks good in TensorBoard. 
- Add summaries to visualize the learning curves in TensorBoard. 
- Try tweaking some hyperparameters such as the learning rate or the mini-batch size and look at the shape of the learning curve.