# Basic Concepts and API

All TF code follows this process:
1. Create a **computation graph** that defines your computational structure
2. Create a TF session
3. Run the computation graph in the session

In [1]:
import tensorflow as tf

In [2]:
# Define variables and operations in the graph

x = tf.Variable(3, name="x") # declare a symbolic name, x
y = tf.Variable(4, name="y")
g = x*x*y
h = y**3
print(type(g))
print(type(h))
f = g + h
print(type(f))

<class 'tensorflow.python.framework.ops.Tensor'>
<class 'tensorflow.python.framework.ops.Tensor'>
<class 'tensorflow.python.framework.ops.Tensor'>


The *type* of each computation is a TF **op**.

In [3]:
# Build a session and run. Using the "with" context block automatically closes the session.
with tf.Session() as tf_sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [4]:
result

100

An alternative to initializing variables individually is to call the <code>global_variables_initializer</code> function.

In [5]:
init = tf.global_variables_initializer() # Creates an init node

with tf.Session() as tf_sess:
    init.run()
    result = f.eval()
    print(result)

100


## Graphs

We can build graphs and then merge them together programmatically. Otherwise, it is assumed that declared computations are applied to the **same graph**.

In [6]:
x1 = tf.Variable(1)
# check where this x1 node lives:
x1.graph is tf.get_default_graph()

True

In [7]:
# Now, make another graph and add a new variable to it:
new_graph = tf.Graph()
with new_graph.as_default():
    x2 = tf.Variable(2)
    
print(x2.graph is tf.get_default_graph())
print(x2.graph is new_graph)

False
True


## More on Nodes

TF node evaluation determines the set of nodes that the node depends on and evaluates them. **All node values (except variables) are dropped between graph runs!**

Varialbes start their life when initialized and end when the session closes.

In [8]:
tf.reset_default_graph()

w = tf.constant(9)
x = w * 7
y = x + 2
z = x**2

with tf.Session() as tf_sess:
    print(y.eval())
    print(z.eval())

65
3969


The above code is not efficient, as the computation of x and w will happen twice! Instead, have y and evaluate in a single graph run.

In [9]:
with tf.Session() as tf_sess:
    y_val, z_val = tf_sess.run([y,z])
    print(y_val)
    print(z_val)

65
3969


## Operations

TF "ops" can take *any* number of inputs and produce *any* number of outputs. Sources are constants and Variables. The inputs and outputs of operations are always **tensors** - multi-dimensional arrays. In TF, tensors are numpy <code>ndarray</code>s.

The following example performs linear regression using the closed form Normal Equation embedded as a TF op.

In [10]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

housing_dataset = fetch_california_housing()
m,n = housing_dataset.data.shape
print("Data shape: " + str(m) + " instances, " + str(n) + " features")

X_raw = housing_dataset.data
y_raw = housing_dataset.target

# Split up the data
X_train, X_test, y_train, y_test = train_test_split(X_raw, y_raw, test_size=0.2)
print("Training size: " + str(X_train.shape[0]) + "; Test size: " + str(X_test.shape[0]))

# Scale the data sets
housing_scaler = StandardScaler()
X_train_scaled = housing_scaler.fit_transform(X_train)
X_test_scaled = housing_scaler.transform(X_test)

Data shape: 20640 instances, 8 features
Training size: 16512; Test size: 4128


In [11]:
# Add a bias of 1 to model the linear regression.
X_train_biased = np.c_[np.ones((X_train_scaled.shape[0],1)), X_train_scaled]
X_test_biased = np.c_[np.ones((X_test_scaled.shape[0],1)), X_test_scaled]
print("Biased train data shape: " + str(X_train_biased.shape[0]) + " instances, " + str(X_train_biased.shape[1]) + " features")
print("Biased test data shape: " + str(X_test_biased.shape[0]) + " instances, " + str(X_test_biased.shape[1]) + " features")

Biased train data shape: 16512 instances, 9 features
Biased test data shape: 4128 instances, 9 features


In [12]:
X = tf.constant(X_train_biased, dtype=tf.float32, name="X")
print("Target array shape: " + str(X_train_biased.shape))
# Explicitly turn into an m x 1 vector
y = tf.constant(y_train.reshape(-1,1), dtype=tf.float32, name="y")
print("...as TF constant: " + str(y_train.shape))
XT = tf.transpose(X)

Target array shape: (16512, 9)
...as TF constant: (16512,)


Implement the Normal Equation:
$\theta^{\star} = (X\cdot X^T)^{-1}\cdot{X^T}\cdot{y}$

In [13]:
inv = tf.matrix_inverse( tf.matmul(XT, X) )
theta = tf.matmul( tf.matmul(inv, XT), y )

In [14]:
# Vroom vroom!
with tf.Session() as tf_sess:
    theta_val = theta.eval()

In [15]:
print("Performed a linear regression over the data set:")
print(str(theta_val) + "\n " + str(theta_val.shape))

Performed a linear regression over the data set:
[[ 2.0638726 ]
 [ 0.8261018 ]
 [ 0.11529301]
 [-0.25263292]
 [ 0.2780502 ]
 [-0.00636555]
 [-0.02895787]
 [-0.8981792 ]
 [-0.87140805]]
 (9, 1)


## Manual Gradient Descent via TF

I will re-use the scaled data from above and implement gradient descent manually rather than use the normal equation solution.

In [16]:
# For grins, make a new graph for this implementation.
gd_graph = tf.Graph()
m = X_train_biased.shape[0]
n = X_train_biased.shape[1]

n_epochs = 2000
alpha = 0.01 # learning rate

with gd_graph.as_default():
    X = tf.constant(X_train_biased, dtype=tf.float32, name="X")
    y = tf.constant(y_train.reshape(-1,1), dtype=tf.float32, name="y")
    # Initialize theta variables with uniform random values
    theta = tf.Variable( tf.random_uniform([n, 1], -1.0, 1.0), name="theta" )
    # Compute the predictions and error
    y_pred = tf.matmul( X, theta, name="predictions" )
    error = y_pred - y
    # Call on TF's mse function
    mse = tf.reduce_mean( tf.square(error), name="mse" )
    # Gradient calculations
    dJdtheta = (2.0/m) * tf.matmul( tf.transpose(X), error )
    # Training/learning op. assign() computes a new value and assigns it to a TF variable
    train_op = tf.assign( theta, theta - alpha*dJdtheta )
    
    init_op = tf.global_variables_initializer()

In [17]:
with tf.Session( graph=gd_graph ) as sess:
    sess.run(init_op)
    
    for i in range(n_epochs):
        
        if i % 100 == 0:
            print("Epoch ", i, "MSE = ", mse.eval())
        sess.run(train_op)
    
    # At the end, print the current thetas
    print(theta.eval())


Epoch  0 MSE =  7.3624883
Epoch  100 MSE =  0.81129843
Epoch  200 MSE =  0.696744
Epoch  300 MSE =  0.64358246
Epoch  400 MSE =  0.6069529
Epoch  500 MSE =  0.5814039
Epoch  600 MSE =  0.5635774
Epoch  700 MSE =  0.55113834
Epoch  800 MSE =  0.5424594
Epoch  900 MSE =  0.53640234
Epoch  1000 MSE =  0.5321771
Epoch  1100 MSE =  0.5292278
Epoch  1200 MSE =  0.5271701
Epoch  1300 MSE =  0.52573436
Epoch  1400 MSE =  0.52473235
Epoch  1500 MSE =  0.52403355
Epoch  1600 MSE =  0.52354574
Epoch  1700 MSE =  0.52320516
Epoch  1800 MSE =  0.5229676
Epoch  1900 MSE =  0.52280194
[[ 2.0638661 ]
 [ 0.8138546 ]
 [ 0.11957388]
 [-0.2204336 ]
 [ 0.2469558 ]
 [-0.00463181]
 [-0.02959383]
 [-0.8791873 ]
 [-0.8504208 ]]


The results are pretty good compared to the normal equation. But it would be nice to not have to compute the derivative by hand all the time, especially for more difficult functions, e.g. regularized cost functions. Next, I will use *autodiff* to automatically compute the gradient.

In [18]:
gd_graph2 = tf.Graph()
with gd_graph2.as_default():
    X = tf.constant(X_train_biased, dtype=tf.float32, name="X")
    y = tf.constant(y_train.reshape(-1,1), dtype=tf.float32, name="y")
    # Initialize theta variables with uniform random values
    theta = tf.Variable( tf.random_uniform([n, 1], -1.0, 1.0), name="theta" )
    # Compute the predictions and error
    y_pred = tf.matmul( X, theta, name="predictions" )
    error = y_pred - y
    # Call on TF's mse function
    mse = tf.reduce_mean( tf.square(error), name="mse" )
    # Using tf's autodiff capability
    dJdtheta = tf.gradients( mse, [theta], name="dJdtheta" )[0]
    print(dJdtheta)
    
    # Training/learning op. assign() computes a new value and assigns it to a TF variable
    train_op = tf.assign( theta, theta - alpha*dJdtheta )
    
    init_op = tf.global_variables_initializer()

Tensor("dJdtheta/predictions_grad/MatMul_1:0", shape=(9, 1), dtype=float32)


In [19]:
with tf.Session( graph=gd_graph2 ) as sess:
    sess.run(init_op)
    
    for i in range(n_epochs):
        
        if i % 100 == 0:
            print("Epoch ", i, "MSE = ", mse.eval())
        sess.run(train_op)
    
    # At the end, print the current thetas
    print(theta.eval())


Epoch  0 MSE =  7.447377
Epoch  100 MSE =  0.6730163
Epoch  200 MSE =  0.55053324
Epoch  300 MSE =  0.54264486
Epoch  400 MSE =  0.5386119
Epoch  500 MSE =  0.5355076
Epoch  600 MSE =  0.5330425
Epoch  700 MSE =  0.5310713
Epoch  800 MSE =  0.52948725
Epoch  900 MSE =  0.52820766
Epoch  1000 MSE =  0.5271705
Epoch  1100 MSE =  0.526326
Epoch  1200 MSE =  0.52563715
Epoch  1300 MSE =  0.5250738
Epoch  1400 MSE =  0.5246114
Epoch  1500 MSE =  0.5242318
Epoch  1600 MSE =  0.52391905
Epoch  1700 MSE =  0.5236611
Epoch  1800 MSE =  0.5234484
Epoch  1900 MSE =  0.52327186
[[ 2.0638661 ]
 [ 0.85329   ]
 [ 0.12174978]
 [-0.29710376]
 [ 0.31221607]
 [-0.00445192]
 [-0.02958686]
 [-0.8215701 ]
 [-0.7977139 ]]


It is possible to roll all of the above into a simple call to a tf `Optimizer`!

In [25]:
gdwithopt_graph = tf.Graph()
# All the same intialization code, but then call on a MomentumOptimizer (or whatever other flaver)
with gdwithopt_graph.as_default():
    X = tf.constant(X_train_biased, dtype=tf.float32, name="X")
    y = tf.constant(y_train.reshape(-1,1), dtype=tf.float32, name="y")
    # Initialize theta variables with uniform random values
    theta = tf.Variable( tf.random_uniform([n, 1], -1.0, 1.0), name="theta" )
    # Compute the predictions and error
    y_pred = tf.matmul( X, theta, name="predictions" )
    error = y_pred - y
    # Call on TF's mse function
    mse = tf.reduce_mean( tf.square(error), name="mse" )
    
    # The optimizer:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=alpha)
    training_op = optimizer.minimize(mse)
    #optimizer = tf.train.MomentumOptimizer(learning_rate=alpha, momentum=0.9)
    
    init_op = tf.global_variables_initializer()

In [27]:
with tf.Session(graph=gdwithopt_graph) as sess:
    sess.run(init_op)
    
    for i in range(n_epochs):
        if i % 100 == 0:
            print("Epoch ", i, "MSE = ", mse.eval())
        sess.run(training_op)
    
    # At the end, print the current thetas
    print(theta.eval())


Epoch  0 MSE =  8.766612
Epoch  100 MSE =  0.68664193
Epoch  200 MSE =  0.5629296
Epoch  300 MSE =  0.55095947
Epoch  400 MSE =  0.5436051
Epoch  500 MSE =  0.5382778
Epoch  600 MSE =  0.5343784
Epoch  700 MSE =  0.5315074
Epoch  800 MSE =  0.5293785
Epoch  900 MSE =  0.527789
Epoch  1000 MSE =  0.5265926
Epoch  1100 MSE =  0.5256865
Epoch  1200 MSE =  0.5249942
Epoch  1300 MSE =  0.524461
Epoch  1400 MSE =  0.5240476
Epoch  1500 MSE =  0.52372515
Epoch  1600 MSE =  0.52347136
Epoch  1700 MSE =  0.52327
Epoch  1800 MSE =  0.5231109
Epoch  1900 MSE =  0.5229829
[[ 2.0638661 ]
 [ 0.8442006 ]
 [ 0.12141337]
 [-0.27915266]
 [ 0.29681358]
 [-0.00443377]
 [-0.02960958]
 [-0.8336215 ]
 [-0.8086179 ]]
