In [1]:
# import tensorflow as tf
# TF 2.0 is very different from TF 1.0
# Doing things the old way first, then converting


import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")

f = x*x*y + y + 2

Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)

result = sess.run(f)

print(result)

sess.close()

In [None]:
# works the same, but here session is set as default session

with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [None]:
init = tf.global_variables_initializer() # prepare an init node

with tf.Session() as sess:
    init.run() # actually initialize all the variables
    result = f.eval()

In [None]:
# must close interactive session
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

In [None]:
# Tensorflow program is usually split into 2 parts:
#  1) "Construction Phase" -> Build a computation graph
#  2) "Execution Phase"    -> Run the graph

# Construction phase builds a computation graph representing the ML model
# and the computations required to train it

# Execution phase runs a loop that evaluates the training step repeatedly,
# gradually improving model parameters (e.g. 1 step per mini-batch)

In [None]:
# Managing Graphs:

x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()
# True

In [None]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
x2.graph is graph
# True

x2.graph is tf.get_default_graph()
# False

tf.reset_default_graph()

In [None]:
# Lifecycle of a Node Value:
#   - When evaluating a node, TF determines the set of nodes it depends on and evaluates those first

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval()) # 10
    print(z.eval()) # 15
    
# when evaluating y, TF sees that y depends on x depends on w 

In [None]:
# TF will evaluate w and x twice when evaluating both y and z
# to evaluate these efficiently, must evaluate in 1 graph

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

In [None]:
# Linear Regression with Tensorflow:
#

In [2]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [None]:
housing = fetch_california_housing()

m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)

# Normal Equation
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()
    
    
# Main benefit of computing the Normal Equation with numpy
# is TF will run this on the GPU card if TF GPU is installed

In [None]:
# Implementing Gradient Descent:


In [None]:
# Scale data before computing Gradient Descent:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()


housing_scaled = scaler.fit_transform(housing.data)
housing_data_plus_bias_scaled = np.c_[np.ones((m, 1)), housing_scaled]

In [None]:
# Manually Computing Gradients:

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name="mse")

gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
        

In [None]:
# Autodiff:
#   "symbolic differentiation"

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name="mse")

gradients = tf.gradients(mse, [theta])[0] # This line changed
# gradients = 2/m * tf.matmul(tf.transpose(X), error) 

training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
        

In [None]:
# Using an Optimizer:

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # These lines changed
training_op = optimizer.minimize(mse) 
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
# training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
        

In [None]:
# Different optimizer

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name="mse")

# Can swap out optimizers.
# This optimizer will converge faster than Gradient Descent
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) # This line changed
# optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

training_op = optimizer.minimize(mse) 

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
        

In [None]:
# Vocab:
#     Tensor
#     Session
#     Computation Graph
#         Organization of Nodes defining a computational structure
#     Node
#     Placeholder Node
#     Optimizer
#     Construction Phase
#     Execution Phase
#     Saver node

In [None]:
# Feeding Data to the Training Algorithm:


In [None]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5 ,6], [7, 8, 9]]})
    
print('B_val_1:', B_val_1)
print('B_val_2:', B_val_2)

In [None]:
# Mini-batch Gradient Descent:
#     This won't run since fetch_batch isn't actually loading anything
n_epochs = 1000
learning_rate = 0.01

# These definitions changed
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
# X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
# y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

# These lines added
batch_size = 100
n_batches = int(np.ceil(m / batch_size))



theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y

init = tf.global_variables_initializer()

# Execution changed
def fetch_batch(epoch, batch_index, batch_size):
    # load data from disk
    return X_batch, y_batch

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
    best_theta = theta.eval()




In [None]:
# Saving and Restoring Models:

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")


theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) # This line changed
training_op = optimizer.minimize(mse) 

init = tf.global_variables_initializer()
saver = tf.train.Saver() # This is new => Create Saver node

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0: # checkpoint every 100 epochs
            save_path = saver.save(sess, "/tmp/my_model.ckpt") # save checkpoint

        sess.run(training_op)
        
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt") # save final model


In [None]:
# Restore all variables under their own name
with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")

In [None]:
# saver or restore only the "theta" variable under the name "weights"
saver = tf.train.Saver({"weights": theta})

In [None]:
# Meta graph file:
#     Importing this adds the graph to the default graph,
#     and returns a Saver instance that can be used to restore the graph's state (aka the variable values)

saver = tf.train.import_meta_graph("/tmp/my_model_final.ckpt.meta")

with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")


In [None]:
# Visualizing the Graph and Training Curves Using TensorBoard:


## new ##
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")

root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

## /new ##

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y

mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) # This line changed
training_op = optimizer.minimize(mse) 

init = tf.global_variables_initializer()
saver = tf.train.Saver() # This is new => Create Saver node

## new ##

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

## /new ##


## Not shown in book ##
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size=batch_size)  # not shown
    X_batch = housing_data_plus_bias_scaled[indices] # not shown
    y_batch = housing.target.reshape(-1, 1)[indices] # not shown
    return X_batch, y_batch
## ################# ##


with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step= epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

        if epoch % 100 == 0: # checkpoint every 100 epochs
            save_path = saver.save(sess, "/tmp/my_model.ckpt") # save checkpoint
        
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt") # save final model
    
file_writer.close()

In [None]:
# Name Scopes

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")

root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) # This line changed
training_op = optimizer.minimize(mse) 

init = tf.global_variables_initializer()
saver = tf.train.Saver() # This is new => Create Saver node

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())


## Not shown in book ##
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size=batch_size)  # not shown
    X_batch = housing_data_plus_bias_scaled[indices] # not shown
    y_batch = housing.target.reshape(-1, 1)[indices] # not shown
    return X_batch, y_batch
## ################# ##


with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step= epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

        if epoch % 100 == 0: # checkpoint every 100 epochs
            save_path = saver.save(sess, "/tmp/my_model.ckpt") # save checkpoint
        
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt") # save final model
    
file_writer.close()

In [None]:
# Modularity:

# relu: Rectified Linear Units

### Repetitive version (not DRY)
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z2, 0., name="relu2")

output = tf.add(relu1, relu2, name="output")

In [None]:
# Same output, more modular
# Pass in repetitions to function call

def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X, w), b, name="z")
    return tf.maximum(z, 0., name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")
    

In [None]:
# Sharing Variables:
#
# a)
def relu(X, threshold):
    with tf.name_scope("relu"):
#         [...]
        return tf.maximum(z, threshold, name="max")
    threshold = tf.Variable(0.0, name="threshold")
    X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
    relus = [relu(X, threshold) for i in range(5)]
    output = tf.add_n(relus, name="output")

In [None]:
# b)
#
# only runs on first call
def relu(X):
    with tf.name_scope("relu"):
        if not hassattr(relu, "threshold"):
            relu.threshold = tf.Variable(0.0, name="threshold")
            
        return tf.maximum(z, relu.threshold, name="max")

In [None]:
# c)
#
# scoping variable
with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(),
                               initializer=tf.constant_initializer(0.0))

In [None]:
# d)
#
# scoping variable with reuse=True
with tf.variable_scope("relu", reuse=True):
    threshold = tf.get_variable("threshold")

In [None]:
# e)
#
with tf.variable_scope("relu") as scope:
    scope.reuse_variables()
    threshold = tf.get_variable("threshold")

In [None]:
# f)
#
# variable definition is outside relu function

def relu(X):
    with tf.variable_scope("relu", reuse=True):
        threshold = tf.get_variable("threshold") # reuse existing varialbe
#         [...]
        return tf.maximum(z, threshold, name="max")

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("relu"): # create the variable
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))

relus = [relu(X) for relu_index in range(5)]
output = tf.add_n(relus, name="output")

In [3]:
# g)
#
# Author recommends this approach
# Pull variable declaration inside of relu function


def relu(X):
    threshold=tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
#     [...]
    return tf.maximum(z, threshold, name="max")

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = []
for relu_index in range(5):
    with tf.variable_scope("relu", reuse=(relu_index >= 1 or None)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name="output")