# Chapter 9 Up and Running with TensorFlow

In [1]:
import tensorflow as tf
import numpy as np

## Linear Regression 

In [2]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
XT = tf.transpose(X)
# Explicitly calculate best parameter values
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()
    
theta_value

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /Users/petersaur/scikit_learn_data


array([[-3.6845909e+01],
       [ 4.3667579e-01],
       [ 9.4581824e-03],
       [-1.0708507e-01],
       [ 6.4334780e-01],
       [-3.9223705e-06],
       [-3.7883162e-03],
       [-4.2040566e-01],
       [-4.3347809e-01]], dtype=float32)

Let's look at the different ways to calculate gradients.

## Batch Gradient Descent

Here, we will do so explicitly using the closed-form solution.

In [3]:
# Scale data for gradient descent
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [4]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in np.arange(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE: ', mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()

best_theta

Instructions for updating:
Colocations handled automatically by placer.
Epoch 0 MSE:  2.754427
Epoch 100 MSE:  0.63222194
Epoch 200 MSE:  0.5727803
Epoch 300 MSE:  0.5585008
Epoch 400 MSE:  0.54907006
Epoch 500 MSE:  0.542288
Epoch 600 MSE:  0.5373791
Epoch 700 MSE:  0.533822
Epoch 800 MSE:  0.53124255
Epoch 900 MSE:  0.5293705


array([[ 2.06855226e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44081801e-04],
       [-3.91945131e-02],
       [-8.61356556e-01],
       [-8.23479712e-01]], dtype=float32)

## Autodiff

Or we can have TensorFlow calculate it for you.

In [5]:
tf.reset_default_graph()

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
# Only different line
# TensorFlow calculates gradient for you
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in np.arange(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE: ', mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()

best_theta

Epoch 0 MSE:  2.754427
Epoch 100 MSE:  0.63222194
Epoch 200 MSE:  0.5727803
Epoch 300 MSE:  0.5585009
Epoch 400 MSE:  0.54907006
Epoch 500 MSE:  0.542288
Epoch 600 MSE:  0.5373791
Epoch 700 MSE:  0.533822
Epoch 800 MSE:  0.53124255
Epoch 900 MSE:  0.5293704


array([[ 2.06855249e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44078017e-04],
       [-3.91945094e-02],
       [-8.61356676e-01],
       [-8.23479772e-01]], dtype=float32)

## Using an Optimizer

In [6]:
tf.reset_default_graph()

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
# Two different lines
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE: ", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()
    
best_theta

Epoch 0 MSE:  2.754427
Epoch 100 MSE:  0.63222194
Epoch 200 MSE:  0.5727803
Epoch 300 MSE:  0.5585009
Epoch 400 MSE:  0.54907006
Epoch 500 MSE:  0.542288
Epoch 600 MSE:  0.5373791
Epoch 700 MSE:  0.533822
Epoch 800 MSE:  0.53124255
Epoch 900 MSE:  0.5293704


array([[ 2.06855249e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44078017e-04],
       [-3.91945094e-02],
       [-8.61356676e-01],
       [-8.23479772e-01]], dtype=float32)

## Mini-Batch Gradient Descent

If we'd like to change the values of the data we feed into our algorithm (like we do with mini-batches), we can use placeholder nodes. 

In [7]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n+1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()

In [8]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

def create_batch(batch_size):
    random_indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[random_indices]
    y_batch = housing.target.reshape(-1,1)[random_indices]
    
    return X_batch, y_batch

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch in np.arange(n_batches):
            X_batch, y_batch = create_batch(batch_size)
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
    
    best_theta = theta.eval()
    
best_theta

array([[ 2.0695038 ],
       [ 0.8283032 ],
       [ 0.10710511],
       [-0.2810113 ],
       [ 0.19031253],
       [-0.00921723],
       [-0.04682321],
       [-0.9058674 ],
       [-0.88086665]], dtype=float32)

## Saving and Restoring Models

To save and restore models, we can create a Saver node after creating all our variable nodes, and calling its save method whenever we want to store the current contents of our model. Restoring them is as simple as calling the node's restore method. 

In [9]:
tf.reset_default_graph()

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

# Create Saver node
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in np.arange(n_epochs):
        if epoch % 100 == 0:
            saver.save(sess, '/tmp/temp_model.ckpt') # Every 100 epochs
        sess.run(training_op)
        
    best_theta = theta.eval()
    saver.save(sess, '/tmp/final_model.ckpt')

In [10]:
! ls -al /tmp/*model*

-rw-r--r--  1 petersaur  wheel      36 Jun  4 16:27 /tmp/final_model.ckpt.data-00000-of-00001
-rw-r--r--  1 petersaur  wheel     129 Jun  4 16:27 /tmp/final_model.ckpt.index
-rw-r--r--  1 petersaur  wheel  832944 Jun  4 16:27 /tmp/final_model.ckpt.meta
-rw-r--r--  1 petersaur  wheel      36 Jun  4 16:27 /tmp/temp_model.ckpt.data-00000-of-00001
-rw-r--r--  1 petersaur  wheel     129 Jun  4 16:27 /tmp/temp_model.ckpt.index
-rw-r--r--  1 petersaur  wheel  832944 Jun  4 16:27 /tmp/temp_model.ckpt.meta


## Visualizing the Graph

In [11]:
from datetime import datetime

current_time = datetime.now().strftime('%Y%m%d%H%M%S')
logdir_root = 'tf_logs'
logdir = f'{logdir_root}/{current_time}'
print(logdir)

tf_logs/20190604162738


In [12]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [13]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in np.arange(n_epochs):
        for batch_number in np.arange(n_batches):
            X_batch, y_batch = create_batch(batch_size)
            if batch_number % 10 == 0: # every 10th batch
                summary_str = mse_summary.eval(feed_dict = {X: X_batch, y: y_batch})
                step = epoch * batch_size + batch_number
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
            
    best_theta = theta.eval()
    
file_writer.close()

## Name Scopes

Scopes will group nodes into one larger collapsible namespace on TensorBoard. For example, to collapse error and mse into one namespace called 'loss', we could write the following where they're defined.

In [14]:
with tf.name_scope('loss') as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name='mse')

## Modularity

We can combine name scopes with the idea of modularizing repeated tasks to write better code. Let's create five ReLUs and sum their output. 

In [15]:
tf.reset_default_graph()

# Rectilinear activation function
def relu(X):
    with tf.name_scope('relu'):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name='weights')
        b = tf.Variable(0.0, name='bias')
        z = tf.add(tf.matmul(X, w), b, name='z')
        return tf.maximum(0., z, name='max')
    
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')
relus = [relu(X) for _ in np.arange(5)]
output = tf.add_n(relus, name='output')

## Sharing Variables

If we'd like to avoid sharing variables by passing them as parameters every time we need them, we can also use the get_variable() function to share variables between different parts of our graph, passing reuse=True or calling scope.reuse_variablees() to use a variable that has already been defined.

In [16]:
tf.reset_default_graph()

def relu(x):
    threshold = tf.get_variable('threshold', shape=(),
                               initializer=tf.constant_initializer(0.0))
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name='weights')
    b = tf.Variable(0.0, name='bias')
    z = tf.add(tf.matmul(X, w), b, name='z')
    return tf.maximum(threshold, z, name='max')

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = []
for relu_index in range(5):
    with tf.variable_scope("relu", reuse=(relu_index >= 1)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name="output")