# Tensorflow Basics
An opensource software library for numerical computation
A graph of computations to be performed is defined in python, tensorflow takes the graph and runs it using optimized C++ code.
Reference:
https://github.com/ageron/handson-ml

## Creating a Graph and running it in session

In [1]:
# Common imports
import tensorflow as tf

# Define the graph
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

# Create the session
sess = tf.Session()

# Initialize the variables
sess.run(x.initializer)
sess.run(y.initializer)

# Evaluate
result = sess.run(f)
print(result)

# Close session
sess.close()

42


In [2]:
# Easier invocation with session automatically closed at end

with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    
print(result)

42


In [3]:
# Initializer for all variables

init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    result = f.eval()
    
print(result)

42


In [4]:
# Can use InteractiveSession in an Jupyter Notebook
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)

42


In [5]:
sess.close()

## Managing Graphs

In [6]:
# Any node created is added to the default graph
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [7]:
# Multiple graphs can be managed, one can temporarily set a graph as default graph inside a with block
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)

x2.graph is graph

True

In [8]:
x2.graph is tf.get_default_graph()

False

## In Jupyter notebook, the kernel can be restarted to reset the default graph, or use tf.reset_default_graph() to reset.

In [9]:
# Nodes and dependant nodes evaluated twice
tf.reset_default_graph()

w = tf.constant(3)
x = w + 2
y = x + 2
z = x + 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

7
8


In [10]:
# Single evaluation
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    
print(y_val, z_val)

7 8


## Tensorflow Operations

### Linear regression with tensorflow

In [11]:
import numpy as np
from sklearn.datasets import fetch_california_housing

tf.reset_default_graph()

In [12]:
housing = fetch_california_housing()

In [13]:
m, n = housing.data.shape

In [14]:
# Prepare data
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
housing_data_plus_bias.shape

(20640, 9)

In [15]:
# Define graph
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
Y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="Y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), Y)

In [16]:
# Evaluate
with tf.Session() as sess:
    theta_val = theta.eval()
theta_val

array([[-3.7465141e+01],
       [ 4.3573415e-01],
       [ 9.3382923e-03],
       [-1.0662201e-01],
       [ 6.4410698e-01],
       [-4.2513184e-06],
       [-3.7732250e-03],
       [-4.2664889e-01],
       [-4.4051403e-01]], dtype=float32)

### Gradient Descent with tensorflow

### Batch Gradient Descent

In [17]:
# Scaling data
from sklearn.preprocessing import StandardScaler

tf.reset_default_graph()

# Prepare data
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
scaled_housing_data_plus_bias.shape

(20640, 9)

#### Manually computed gradients

In [18]:
n_epochs = 1000
learning_rate = 0.01

# Define graph

x = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="x")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

# random initialization
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(x, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")


**Equation 1: Gradient vector of the cost function**

$
\nabla_{\mathbf{\theta}}\, \text{MSE}(\mathbf{\theta}) =
\begin{pmatrix}
 \frac{\partial}{\partial \theta_0} \text{MSE}(\mathbf{\theta}) \\
 \frac{\partial}{\partial \theta_1} \text{MSE}(\mathbf{\theta}) \\
 \vdots \\
 \frac{\partial}{\partial \theta_n} \text{MSE}(\mathbf{\theta})
\end{pmatrix}
 = \dfrac{2}{m} \mathbf{X}^T \cdot (\mathbf{X} \cdot \mathbf{\theta} - \mathbf{y})
$

In [19]:
# Compute gradients using equation 1
#gradients = 2/m * tf.matmul(tf.transpose(x), error)

# Using autodiff
gradients = tf.gradients(mse, [theta])[0]

**Equation 2: Gradient Descent step**

$
\mathbf{\theta}^{(\text{next step})} = \mathbf{\theta} - \eta \nabla_{\mathbf{\theta}}\, \text{MSE}(\mathbf{\theta})
$

In [20]:
# Compute the gradient descent step using equation 2
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [21]:
# Initialize
init = tf.global_variables_initializer()

# Evaluate
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE 7.7836943
Epoch 100 MSE 0.67482936
Epoch 200 MSE 0.56474763
Epoch 300 MSE 0.55462474
Epoch 400 MSE 0.54887855
Epoch 500 MSE 0.54436547
Epoch 600 MSE 0.5407277
Epoch 700 MSE 0.5377784
Epoch 800 MSE 0.5353804
Epoch 900 MSE 0.533426


In [22]:
best_theta

array([[ 2.0685525 ],
       [ 0.911043  ],
       [ 0.14275357],
       [-0.40302444],
       [ 0.4122085 ],
       [ 0.00317108],
       [-0.04309262],
       [-0.6484518 ],
       [-0.62779343]], dtype=float32)

#### Using gradient descent optimizer

In [26]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
Y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="Y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - Y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [27]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [28]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()
    
print(best_theta)

Epoch 0 MSE 2.7544272
Epoch 100 MSE 0.63222194
Epoch 200 MSE 0.5727796
Epoch 300 MSE 0.5585005
Epoch 400 MSE 0.54906934
Epoch 500 MSE 0.5422877
Epoch 600 MSE 0.5373788
Epoch 700 MSE 0.5338219
Epoch 800 MSE 0.5312427
Epoch 900 MSE 0.52937055
[[ 2.06855249e+00]
 [ 7.74078071e-01]
 [ 1.31192386e-01]
 [-1.17845066e-01]
 [ 1.64778143e-01]
 [ 7.44078017e-04]
 [-3.91945094e-02]
 [-8.61356676e-01]
 [-8.23479772e-01]]


#### Using mini-batch gradient descent

In [31]:
n_epochs = 10
learning_rate = 0.01

tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [36]:
init = tf.global_variables_initializer()

batch_size = 100
n_batches = int(np.ceil(m/batch_size))

def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_size)
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

In [37]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
        
    best_theta = theta.eval()
    
best_theta

array([[ 2.1181471 ],
       [ 0.88477826],
       [ 0.21145836],
       [-0.12849197],
       [ 0.42868158],
       [-0.10997759],
       [-0.71003485],
       [-0.9115423 ],
       [-0.8364051 ]], dtype=float32)

### Saving and Restoring models

In [40]:
# Saving

tf.reset_default_graph()

n_epoch = 1000
learning_rate = 0.01

# construct graph nodes
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
# create a saver node
saver = tf.train.Saver()

# evaluate
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epoch):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE", mse.eval())
            save_path = saver.save(sess, "./my_model.ckpt")
        sess.run(training_op)
    
    best_theta = theta.eval()
    save_path = saver.save(sess, "./my_final_model.ckpt")

best_theta

Epoch 0 MSE 9.336487
Epoch 100 MSE 0.5290166
Epoch 200 MSE 0.52492553
Epoch 300 MSE 0.52440155
Epoch 400 MSE 0.5243317
Epoch 500 MSE 0.524322
Epoch 600 MSE 0.52432084
Epoch 700 MSE 0.52432114
Epoch 800 MSE 0.524321
Epoch 900 MSE 0.5243207


array([[ 2.0685577 ],
       [ 0.82962817],
       [ 0.11875328],
       [-0.2655437 ],
       [ 0.30571023],
       [-0.00450254],
       [-0.0393266 ],
       [-0.89986545],
       [-0.87052184]], dtype=float32)

In [41]:
# Restoring
with tf.Session() as sess:
    saver.restore(sess, "./my_final_model.ckpt")
    theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from ./my_final_model.ckpt


In [42]:
np.allclose(best_theta, theta_restored)

True

### Visualizing the graph and training curves

In [61]:
# Visualizing the MSE

from datetime import datetime

tf.reset_default_graph()

# log dir for tensorboard
current = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
log_dir = "{}/run-{}/".format(root_logdir, current)

n_epoch = 50
learning_rate = 0.01
batch_size = 100
n_batches = int(np.ceil(m/batch_size))

# construct graph nodes
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

# create a node to evaluate MSE and write it to a tensorboard binary - summary
mse_summary = tf.summary.scalar("MSE", mse)
# write the summaries to the log dir
file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())

In [62]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epoch):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y:y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
            
    best_theta = theta.eval()

In [63]:
file_writer.close()

In [64]:
best_theta

array([[ 1.9848642 ],
       [ 0.7630734 ],
       [ 0.09626528],
       [-0.18267234],
       [ 0.484666  ],
       [ 0.08058567],
       [-0.54538715],
       [-0.87415826],
       [-0.8057145 ]], dtype=float32)

### Named Scopes

In [70]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [71]:
# reset the graph
reset_graph()

# set the log folder path for tensorboard
# format - tf_logs/run-{YYYYMMDDHHMMSS}
current = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
log_dir = "{}/run-{}/".format(root_logdir, current)

# parameters
n_epochs = 10
batch_size = 100
learning_rate = 0.01
n_batches = int(np.ceil(m/batch_size))

# graph construction
X = tf.placeholder(dtype=tf.float32, shape=(None, n+1), name="X")
y = tf.placeholder(dtype=tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")
    
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

# initialize
init = tf.global_variables_initializer()

# create node to evaluate MSE and write it to a tensorboard binary - summary
mse_summary = tf.summary.scalar("MSE", mse)
# write summary to log dir
file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())

# graph execution
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epoch):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index%10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:y_batch})
                step = epoch*n_batches+batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
        
    best_theta = theta.eval()
    
file_writer.flush()
file_writer.close()
print(best_theta)

[[ 1.9848869 ]
 [ 0.76331186]
 [ 0.0962311 ]
 [-0.18335664]
 [ 0.48558667]
 [ 0.08064776]
 [-0.54495084]
 [-0.87403667]
 [-0.80562687]]


### Modularity

In [73]:
# Repetitive code for RelUs
reset_graph()

n_features = 3

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z2, 0., name="relu2")

output = tf.add(relu1, relu2, name="output")

print(output)

Tensor("output:0", shape=(?, 1), dtype=float32)


In [74]:
# Better code to create a ReLUs

reset_graph()

def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X, w), b, name="z")
    return tf.maximum(z, 0., name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for _ in range(5)]
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter("logs/relu", tf.get_default_graph())

In [76]:
# Improved with named scopes

reset_graph()

def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, 0., name="max")

In [77]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for _ in range(5)]
output = tf.add_n(relus, name="output")

file_writer = tf.summary.FileWriter("logs/relu2", tf.get_default_graph())
file_writer.close()

### Sharing Variables

In [79]:
# 1 - Creating a shared variable outside the relu function and passing it as a parameter

reset_graph()

def relu(X, threshold):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="max")
    
threshold = tf.Variable(0.0, name="threshold")
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X, threshold) for _ in range(5)]
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter("logs/relu3", tf.get_default_graph())
file_writer.close()

In [80]:
# 2 - Set the shared variable as a attribute of the relu() function when first called

reset_graph()

def relu(X):
    with tf.name_scope("relu"):
        if not hasattr(relu, "threshold"):
            relu.threshold = tf.Variable(0.0, name="threshold")
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, relu.threshold, name="max")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for _ in range(5)]
output = tf.add_n(relus, name="outputs")

file_writer = tf.summary.FileWriter("logs/relu4", tf.get_default_graph())
file_writer.close()

In [82]:
# 3 - Create a shared variable or reuse if already created using get_variable function
# And use attribute of variable_scope to control the behaviour

reset_graph()

with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))


In [84]:
# To reuse explictly set attribute of variable_scope

with tf.variable_scope("relu", reuse=True):
    threshold = tf.get_variable("threshold")

In [85]:
# Set the resue attribute using the scope's reuse_variables function

with tf.variable_scope("relu") as scope:
    scope.reuse_variables()
    threshold = tf.get_variable("threshold")

In [87]:
reset_graph()

def relu(X):
    # Resue shared threshold variable
    with tf.variable_scope("relu", reuse=True):
        threshold = tf.get_variable("threshold")
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="max")
    
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
# create a shared threshold variable
with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))

relus = [relu(X) for _ in range(5)]
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter("logs/relu5", tf.get_default_graph())
file_writer.close()

In [88]:
# Create the shared variable threshold inside the relu function

reset_graph()

def relu(X):
    # create the shared threshold variable
    with tf.variable_scope("relu"):
        threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="max")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("", default_name="") as scope:
    # create shared variable
    first_relu = relu(X)
    # resue shared variable
    scope.reuse_variables()
    relus = [first_relu] + [relu(X) for _ in range(4)]
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter("logs/relu6", tf.get_default_graph())
file_writer.close()
    