# Creating my first graph and running it in a session

The following code creates the graph

In [1]:
import tensorflow as tf

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

This code does not actually perform any computation. In fact, even the variables are not initialized yet. To evaluate this graph, you need to open a TensorFlow _session_ and use it to initialize the variables and evaluate **f**.

In [2]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print (result)
sess.close()

42


Having to repeat `sess.run()` all the time is a bit cumbersome. Here is the better way.

In [3]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

Instead of manually running the initializer for every single variable, you can use the `global_variables_initializer()` function

In [4]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run() # actually initializer all the variables
    result = f.eval()

Inside Jupyter, you may prefer to create an `InteractiveSession`. It automatically sets itself as the default session, so you don't need a with block ( but need to close the session manually)

In [5]:
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


#### A TensorFlow program is typically split into two parts: 

1. Building a computation graph, called the *construction phase*
2. Running it, *the execution phase*

In [6]:
import tensorflow as tf

# The first part: the construction phase
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

init = tf.global_variables_initializer()

# The second part: the execution phase
with tf.Session() as sess:
    init.run() 
    result = f.eval()
    print(result)

42


# Managing Graphs

Any node you create is automatically added to the default graph:

In [7]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

You can manage multiple independent graphs by creating a new `Graph` and temporarily making it the default graph inside a `with` block, like so:

In [8]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)

In [9]:
x2.graph is graph

True

In [10]:
x2.graph is tf.get_default_graph

False

# Lifecycle of a Node Value

A variable starts its life when its initializer is run, and it ends when the session is closed.

In [11]:
w = tf.constant(3)
x = w + 2
y = x + 5 # y depends on x, which depends on w:  w -> x -> y
z = x * 3 

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


To evaluate y and z efficiently, w/o evaluating w and x twice as in the previous code, you must ask TensorFlow to evaluate both y and z in just one graph run, as shown in the following code:

In [12]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


# Linear Regression with Tensorflow

In [13]:
# It starts by fetching the dataset
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing

# Adding an extra bias input feature (x0 = 1) to all training instances
housing = fetch_california_housing()
m, n = housing.data.shape # (20640, 8)
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data] # (20640, 9)

# Creating two TensorFlow constant nodes, X and y
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X") # (20640, 9)
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y') # (20640, 1)
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y) # Review: The Normal Equation

with tf.Session() as sess:
    theta_value = theta.eval()

### Review: The Normal Equation

A mathematical equation that gives the result directly

$$ \hat{\theta} = (\mathbf{X^T} \cdot  \mathbf{X})^{-1} \cdot \mathbf{X^T} \cdot y $$

* $\hat{\theta}$ is the value of $\theta$ that minimizes the cost function.
* $y$ is the vector of target values containing $y^{(1)}$ to $y^{(m)}$

### NumPy vs. TensorFlow: The main benefit
TensorFlow will automatically run this on your GPU card

# Implementing Gradient Descent

Using Batch Gradient Descent instead of the Normal Equation, we will do this:

1. by manually computing the gradients
2. by using TensorFlow's autodiff feature
3. by using Tensorflow's out-of-the-box optimizers

**important notes: normalize the input feature vectors first**

## Manually Computing the Gradient

In [14]:
# Normalize the feature vector: scaled_housing_data_plus_bias
from sklearn import preprocessing

scaler =preprocessing.StandardScaler().fit(housing_data_plus_bias)

scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

In [15]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta") # random_uniform() generating a tensor containing random values
y_pred = tf.matmul(X, theta, name="prediction")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()   
    

Epoch 0 MSE = 6.09568
Epoch 100 MSE = 5.03491
Epoch 200 MSE = 4.96098
Epoch 300 MSE = 4.92778
Epoch 400 MSE = 4.90319
Epoch 500 MSE = 4.88385
Epoch 600 MSE = 4.86848
Epoch 700 MSE = 4.85622
Epoch 800 MSE = 4.84638
Epoch 900 MSE = 4.83847


# Using autodiff

Simply replcae the `gradients = ...` line in the Gradient Descent code in the previous section

In [16]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta") # random_uniform() generating a tensor containing random values
y_pred = tf.matmul(X, theta, name="prediction")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
#gradients = 2/m * tf.matmul(tf.transpose(X), error)
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()   
    

Epoch 0 MSE = 7.07905
Epoch 100 MSE = 4.98361
Epoch 200 MSE = 4.92228
Epoch 300 MSE = 4.89142
Epoch 400 MSE = 4.86896
Epoch 500 MSE = 4.8524
Epoch 600 MSE = 4.84017
Epoch 700 MSE = 4.8311
Epoch 800 MSE = 4.82436
Epoch 900 MSE = 4.81932


# Using an Optimizer
You can simply replace the preceding `gradients = ...` and `training_op = ...` lines

In [17]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta") # random_uniform() generating a tensor containing random values
y_pred = tf.matmul(X, theta, name="prediction")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
# gradients = tf.gradients(mse, [theta])[0]
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse)
#training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()   
    

Epoch 0 MSE = 9.55318
Epoch 100 MSE = 4.88115
Epoch 200 MSE = 4.82529
Epoch 300 MSE = 4.81718
Epoch 400 MSE = 4.8132
Epoch 500 MSE = 4.81045
Epoch 600 MSE = 4.80848
Epoch 700 MSE = 4.80704
Epoch 800 MSE = 4.80601
Epoch 900 MSE = 4.80526


# Feeding Data to the Training Algorithm

To implement Mni-batch GRadient Descent, you need a way to replcae X and y at every iteration with the next mini-batch. The placeholder nodes are used to pass the training data to TensorFlow during training. To create a placeholder node,
1. call the `placeholder()` function.
2. specify the output tensor's data type.
3. optionally, you can specify its shape, if you want to enforce it. `None` means "any size."

In [18]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict = {A:[[1,2,3]]})
    B_val_2 = B.eval(feed_dict = {A:[[4,5,6], [7,8,9]]})
    
    print(B_val_1)
    print(B_val_2)

[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]


In [19]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * batch_index)
    batch_mark = np.random.choice(m, batch_size)
    X_batch = scaled_housing_data_plus_bias[batch_mark]
    y_batch = housing.target.reshape(-1,1)[batch_mark]
    return X_batch, y_batch

In [22]:
n_epochs = 1000
learning_rate = 0.01

# Changing the definition of X and y in the construction phase to make them placeholder nodes

# X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
# y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta") # random_uniform() generating a tensor containing random values
y_pred = tf.matmul(X, theta, name="prediction")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)

training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

# define the batch size and compute the total number of batches 
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            # fetch the mini-batches one by one, then provide the value of X and y via the feed_dict parameter
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    
    best_theta = theta.eval()   
    

# Saving and Restoring Models

Once you have trained your model, why should we save its parameters?
1. you can come back to it ewhenever you want.
2. using it in anotehr program.
3. comparing it to other models, and so on.

Moreover, you can prevent from losing your parameters by saving checkpoints at regular intervals during training. So, how? Just create a `Saver` node at the end of the construction phase (after all variable nodes are created). then, in the execution phase, just call its `save()` method whenever you want to save the model, passing it the session and path of the checkpoint file.

In [23]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta") # random_uniform() generating a tensor containing random values
y_pred = tf.matmul(X, theta, name="prediction")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)

training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

saver = tf.train.Saver()
# saver = tf.train.Saver({"weights": theta})

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            _, mse_val = sess.run([training_op, mse], feed_dict={X: X_batch, y: y_batch})
        if epoch % 100 == 0: # checkpoint every 100 epochs
            save_path = saver.save(sess, "./tmp/my_model.ckpt")
            print("Epoch ", epoch, "MSE = ", mse_val)

# restoring your model
# with tf.Sesson() as sess:
    # saver.restore(sess, "./tmp/my_model_final.ckpt") 

    
    best_theta = theta.eval()
    save_path = saver.save(sess, "./tmp/my_model_final.ckpt")    

Epoch  0 MSE =  4.27216
Epoch  100 MSE =  4.65465
Epoch  200 MSE =  5.03598
Epoch  300 MSE =  5.04383
Epoch  400 MSE =  4.51255
Epoch  500 MSE =  5.10193
Epoch  600 MSE =  4.6594
Epoch  700 MSE =  4.49823
Epoch  800 MSE =  4.84197
Epoch  900 MSE =  5.39028


# Visualizing the Graph and Training Curves Using TensorBoard

The TensorBoard nicely display interactive visulizations of some taining stats you feed in your webv browser (e.g., learning curves).
1. writes the graph definition and some training stats - the training error (MSE).
2. need to use a different log directory every time you run your program: include a timestamp in the log directory name

In [None]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta") # random_uniform() generating a tensor containing random values
y_pred = tf.matmul(X, theta, name="prediction")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)

training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

saver = tf.train.Saver()
# saver = tf.train.Saver({"weights": theta})

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if epoch % 100 == 0: # checkpoint every 100 epochs
                save_path = saver.save(sess, "./tmp/my_model.ckpt")
                print("Epoch ", epoch, "MSE = ", mse_val)

# restoring your model
# with tf.Sesson() as sess:
    # saver.restore(sess, "./tmp/my_model_final.ckpt") 

    
    best_theta = theta.eval()
    save_path = saver.save(sess, "./tmp/my_model_final.ckpt")    