In [1]:
import tensorflow as tf

In [2]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

Instructions for updating:
Colocations handled automatically by placer.


In [3]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

42


In [4]:
sess.close()

## A more efficient way to having to repeat sess.run() is to do the following

In [5]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [6]:
init = tf.global_variables_initializer() #prepare an init node

In [7]:
with tf.Session() as sess:
    init.run() # initialize all variables
    result = f.eval()

In [8]:
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)

42


In [9]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

#### Any time a node is created it gets added to the graph. In order to combat and manage this it is possible to create a new graph and temporarily make it the default graph within a block

In [10]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
x2.graph is graph

True

In [11]:
x2.graph is tf.get_default_graph()

False

### While troubleshooting or coding our graphs, we may run into a case with many overlapping nodes and issues. 

#### In order to correct this we may take advantage of tf.reset_default_graph()

In [12]:
tf.reset_default_graph()

### In the following code below, x and w will both be evaluated twice for y, and z.

In [13]:
w = tf.constant(3)
x = w + 1
y = x + 6
z = x * 4

with tf.Session() as sess:
    print(y.eval()) 
    print(z.eval())

10
16


## Say if we only want to evaluate x and w once, we may do so in just one graph. 

In [14]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y.eval()) 
    print(z.eval())

10
16


### Utilizing tensorflow for more than a scalar vector....
#### Let's do some computations

In [15]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [16]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [17]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()
    print(theta_value)

[[-3.6959320e+01]
 [ 4.3698898e-01]
 [ 9.4245886e-03]
 [-1.0791138e-01]
 [ 6.4842808e-01]
 [-3.9986235e-06]
 [-3.7866351e-03]
 [-4.2142656e-01]
 [-4.3467718e-01]]


## Above the normal equation was utilized in order to calculate theta.

## Instead, let's implement gradient descent to calculate theta for us.

### Recall for gradient descent that data normalization needs to be undertaken before we work with the data. Otherwise training will take much more time.

In [18]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

In [19]:
n_epochs = 5000
learning_rate = 0.01

In [20]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 10.156375
Epoch 100 MSE = 5.0488753
Epoch 200 MSE = 4.9619884
Epoch 300 MSE = 4.9203467
Epoch 400 MSE = 4.890257
Epoch 500 MSE = 4.868146
Epoch 600 MSE = 4.8518395
Epoch 700 MSE = 4.839782
Epoch 800 MSE = 4.830838
Epoch 900 MSE = 4.8241796
Epoch 1000 MSE = 4.8192058
Epoch 1100 MSE = 4.8154745
Epoch 1200 MSE = 4.8126645
Epoch 1300 MSE = 4.810539
Epoch 1400 MSE = 4.8089213
Epoch 1500 MSE = 4.8076863
Epoch 1600 MSE = 4.8067384
Epoch 1700 MSE = 4.8060064
Epoch 1800 MSE = 4.8054385
Epoch 1900 MSE = 4.8049965
Epoch 2000 MSE = 4.804648
Epoch 2100 MSE = 4.8043747
Epoch 2200 MSE = 4.804159
Epoch 2300 MSE = 4.803987
Epoch 2400 MSE = 4.8038487
Epoch 2500 MSE = 4.803739
Epoch 2600 MSE = 4.8036494
Epoch 2700 MSE = 4.8035784
Epoch 2800 MSE = 4.8035197
Epoch 2900 MSE = 4.8034725
Epoch 3000 MSE = 4.8034344
Epoch 3100 MSE = 4.8034024
Epoch 3200 MSE = 4.803376
Epoch 3300 MSE = 4.8033557
Epoch 3400 MSE = 4.8033376
Epoch 3500 MSE = 4.803323
Epoch 3600 MSE = 4.803312
Epoch 3700 MSE = 4.803302

### A good solution is found well before many iterations are ran.

## Unfortunately, manually inputting gradients for every line in a tensorflow calculation is extremely tedious. 
### Instead we may take advantage of the autodiff approach, which utilizes a reverse-mode autodiff, which AUTOMATICALLY and EFFICIENTLY computes gradients. 

# Sounds like a dream for an engineer who loves efficiency :)

In [21]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 12.4902115
Epoch 100 MSE = 4.960502
Epoch 200 MSE = 4.8838835
Epoch 300 MSE = 4.863715
Epoch 400 MSE = 4.84939
Epoch 500 MSE = 4.838655
Epoch 600 MSE = 4.830559
Epoch 700 MSE = 4.824424
Epoch 800 MSE = 4.819753
Epoch 900 MSE = 4.8161793
Epoch 1000 MSE = 4.81343
Epoch 1100 MSE = 4.8113055
Epoch 1200 MSE = 4.8096538
Epoch 1300 MSE = 4.8083634
Epoch 1400 MSE = 4.8073516
Epoch 1500 MSE = 4.8065515
Epoch 1600 MSE = 4.8059187
Epoch 1700 MSE = 4.805414
Epoch 1800 MSE = 4.80501
Epoch 1900 MSE = 4.804685
Epoch 2000 MSE = 4.804425
Epoch 2100 MSE = 4.804213
Epoch 2200 MSE = 4.804041
Epoch 2300 MSE = 4.8039007
Epoch 2400 MSE = 4.803787
Epoch 2500 MSE = 4.8036942
Epoch 2600 MSE = 4.803617
Epoch 2700 MSE = 4.803554
Epoch 2800 MSE = 4.803503
Epoch 2900 MSE = 4.80346
Epoch 3000 MSE = 4.803425
Epoch 3100 MSE = 4.8033953
Epoch 3200 MSE = 4.803372
Epoch 3300 MSE = 4.803352
Epoch 3400 MSE = 4.803335
Epoch 3500 MSE = 4.803321
Epoch 3600 MSE = 4.80331
Epoch 3700 MSE = 4.8033
Epoch 3800 MSE = 4

## Simply replace the manual calculation of gradients line for the line

### gradients = tf.gradients(mse, [theta])[0]

# To make things EVEN BETTER tensorflow has built in optimizers, even one for gradient descent. Yep. Making things that much better.

## we may go and make adjustments to the prior code with a simple optimizer replace training_op

In [22]:
import time
start = time.time()
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
end = time.time()
print(end - start)

Epoch 0 MSE = 8.316429
Epoch 100 MSE = 4.934683
Epoch 200 MSE = 4.868843
Epoch 300 MSE = 4.8524013
Epoch 400 MSE = 4.8406763
Epoch 500 MSE = 4.831898
Epoch 600 MSE = 4.825293
Epoch 700 MSE = 4.820297
Epoch 800 MSE = 4.816504
Epoch 900 MSE = 4.813606
Epoch 1000 MSE = 4.811385
Epoch 1100 MSE = 4.8096714
Epoch 1200 MSE = 4.8083444
Epoch 1300 MSE = 4.8073096
Epoch 1400 MSE = 4.8065
Epoch 1500 MSE = 4.805862
Epoch 1600 MSE = 4.8053565
Epoch 1700 MSE = 4.804956
Epoch 1800 MSE = 4.804635
Epoch 1900 MSE = 4.804379
Epoch 2000 MSE = 4.804173
Epoch 2100 MSE = 4.8040056
Epoch 2200 MSE = 4.8038707
Epoch 2300 MSE = 4.80376
Epoch 2400 MSE = 4.8036704
Epoch 2500 MSE = 4.8035975
Epoch 2600 MSE = 4.803538
Epoch 2700 MSE = 4.803489
Epoch 2800 MSE = 4.803449
Epoch 2900 MSE = 4.803415
Epoch 3000 MSE = 4.803387
Epoch 3100 MSE = 4.8033648
Epoch 3200 MSE = 4.803345
Epoch 3300 MSE = 4.80333
Epoch 3400 MSE = 4.803316
Epoch 3500 MSE = 4.8033066
Epoch 3600 MSE = 4.8032975
Epoch 3700 MSE = 4.803291
Epoch 3800 MSE 

### Simple, with just a few lines of code we utilize a built in optimizer. No big deal

### other optimizers may be used, for example, a momentum optimizer
# please note that often times momentum optimizers can be much faster for convergence than gradient descent

In [23]:
start = time.time()
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.75)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
end = time.time()
print(end - start)
print("Best theta:")
print(best_theta)

Epoch 0 MSE = 8.125369
Epoch 100 MSE = 4.8794255
Epoch 200 MSE = 4.8363953
Epoch 300 MSE = 4.8182316
Epoch 400 MSE = 4.810185
Epoch 500 MSE = 4.8065057
Epoch 600 MSE = 4.804792
Epoch 700 MSE = 4.803984
Epoch 800 MSE = 4.8036017
Epoch 900 MSE = 4.8034196
Epoch 1000 MSE = 4.803333
Epoch 1100 MSE = 4.8032913
Epoch 1200 MSE = 4.803272
Epoch 1300 MSE = 4.8032627
Epoch 1400 MSE = 4.803258
Epoch 1500 MSE = 4.803256
Epoch 1600 MSE = 4.8032556
Epoch 1700 MSE = 4.8032546
Epoch 1800 MSE = 4.8032537
Epoch 1900 MSE = 4.8032537
Epoch 2000 MSE = 4.8032546
Epoch 2100 MSE = 4.803254
Epoch 2200 MSE = 4.8032546
Epoch 2300 MSE = 4.8032537
Epoch 2400 MSE = 4.8032537
Epoch 2500 MSE = 4.803254
Epoch 2600 MSE = 4.8032546
Epoch 2700 MSE = 4.803254
Epoch 2800 MSE = 4.8032537
Epoch 2900 MSE = 4.803254
Epoch 3000 MSE = 4.8032537
Epoch 3100 MSE = 4.8032537
Epoch 3200 MSE = 4.8032537
Epoch 3300 MSE = 4.8032537
Epoch 3400 MSE = 4.8032537
Epoch 3500 MSE = 4.8032537
Epoch 3600 MSE = 4.8032537
Epoch 3700 MSE = 4.803253

# you betcha, more than 25%  faster for convergence using a momentum optimizer

# Implementing batch gradient descent with tensorflow
## to implement Mini-batch GD, we must replace X and y at every iteration.
### in order to do so, placeholder nodes can be utilized. They just output the data we tell them to output at runtime.

## in practice, they are typically used to pass training data to TensorFlow during training a value must be specified at runtime, otherwise an exception occurs

### specifying ,None, for a dimension means that the placeholder node can be any size

# Example for Placeholder B = A * 5
# C = A + 5

In [28]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A * 5
C = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})
    C_val_1 = C.eval(feed_dict={A: [[1, 2, 3]]})
    C_val_2 = C.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})


print(B_val_1)

print(B_val_2)

print(C_val_1)

print(C_val_2)


[[ 5. 10. 15.]]
[[20. 25. 30.]
 [35. 40. 45.]]
[[6. 7. 8.]]
[[ 9. 10. 11.]
 [12. 13. 14.]]


## For mini-batch gradient descent we need to first make X and y placeholder nodes

In [29]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 5000
n_batches = int(np.ceil(m / batch_size))

#### Note: recall ceil is for rounding up integers.
## For the execution, feed in mini-batches one by one, provide the value of X and y via feed_dict parameter

In [30]:
start = time.time()
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.75)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

batch_size = 5000
n_batches = int(np.ceil(m / batch_size))

def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  
    indices = np.random.randint(m, size=batch_size)  
    X_batch = scaled_housing_data_plus_bias[indices] 
    y_batch = housing.target.reshape(-1, 1)[indices] 
    return X_batch, y_batch
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0: #checkpoint every 100 epochs
            for batch_index in range(n_batches):
                X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
                save_path = saver.save(sess, "/tmp/my_model.ckpt")
                sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
    best_theta = theta.eval()
    save_path = saver.save(, "/tmp/my_model_final.ckpt")
end = time.time()
print(end - start)
print("Best theta:")
print(best_theta)

16.119039058685303
Best theta:
[[-0.72113657]
 [ 0.74445766]
 [ 0.14158066]
 [-0.01318893]
 [ 0.07011674]
 [ 0.00310853]
 [-0.04627834]
 [-0.85392654]
 [-0.8114292 ]]


# Utilizing TensorBoard

In [62]:
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script src="//cdnjs.cloudflare.com/ajax/libs/polymer/0.3.3/platform.js"></script>
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [63]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [64]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [72]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [73]:
n_epochs = 10
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

In [74]:
with tf.Session() as sess:                                                        
    sess.run(init)                                                                

    for epoch in range(n_epochs):                                                 
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 5 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()  

In [75]:
file_writer.close()

In [76]:
best_theta

array([[ 0.9045429 ],
       [ 0.8222856 ],
       [ 0.07364567],
       [-0.31937072],
       [ 0.19574822],
       [-0.00453428],
       [ 0.01396737],
       [-0.935621  ],
       [-0.8482822 ]], dtype=float32)