# Basic Concepts and API

All TF code follows this process:
1. Create a **computation graph** that defines your computational structure
2. Create a TF session
3. Run the computation graph in the session

In [1]:
import tensorflow as tf

In [2]:
# Define variables and operations in the graph

x = tf.Variable(3, name="x") # declare a symbolic name, x
y = tf.Variable(4, name="y")
g = x*x*y
h = y**3
print(type(g))
print(type(h))
f = g + h
print(type(f))

<class 'tensorflow.python.framework.ops.Tensor'>
<class 'tensorflow.python.framework.ops.Tensor'>
<class 'tensorflow.python.framework.ops.Tensor'>


The *type* of each computation is a TF **op**.

In [3]:
# Build a session and run. Using the "with" context block automatically closes the session.
with tf.Session() as tf_sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [4]:
result

100

An alternative to initializing variables individually is to call the <code>global_variables_initializer</code> function.

In [5]:
init = tf.global_variables_initializer() # Creates an init node

with tf.Session() as tf_sess:
    init.run()
    result = f.eval()
    print(result)

100


## Graphs

We can build graphs and then merge them together programmatically. Otherwise, it is assumed that declared computations are applied to the **same graph**.

In [6]:
x1 = tf.Variable(1)
# check where this x1 node lives:
x1.graph is tf.get_default_graph()

True

In [7]:
# Now, make another graph and add a new variable to it:
new_graph = tf.Graph()
with new_graph.as_default():
    x2 = tf.Variable(2)
    
print(x2.graph is tf.get_default_graph())
print(x2.graph is new_graph)

False
True


## More on Nodes

TF node evaluation determines the set of nodes that the node depends on and evaluates them. **All node values (except variables) are dropped between graph runs!**

Varialbes start their life when initialized and end when the session closes.

In [8]:
tf.reset_default_graph()

w = tf.constant(9)
x = w * 7
y = x + 2
z = x**2

with tf.Session() as tf_sess:
    print(y.eval())
    print(z.eval())

65
3969


The above code is not efficient, as the computation of x and w will happen twice! Instead, have y and evaluate in a single graph run.

In [9]:
with tf.Session() as tf_sess:
    y_val, z_val = tf_sess.run([y,z])
    print(y_val)
    print(z_val)

65
3969


## Operations

TF "ops" can take *any* number of inputs and produce *any* number of outputs. Sources are constants and Variables. The inputs and outputs of operations are always **tensors** - multi-dimensional arrays. In TF, tensors are numpy <code>ndarray</code>s.

The following example performs linear regression using the closed form Normal Equation embedded as a TF op.

In [21]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

housing_dataset = fetch_california_housing()
m,n = housing_dataset.data.shape
print("Data shape: " + str(m) + " instances, " + str(n) + " features")

X_raw = housing_dataset.data
y_raw = housing_dataset.target

# Split up the data
X_train, X_test, y_train, y_test = train_test_split(X_raw, y_raw, test_size=0.2)
print("Training size: " + str(X_train.shape[0]) + "; Test size: " + str(X_test.shape[0]))

# Scale the data sets
housing_scaler = StandardScaler()
X_train_scaled = housing_scaler.fit_transform(X_train)
X_test_scaled = housing_scaler.transform(X_test)

Data shape: 20640 instances, 8 features
Training size: 16512; Test size: 4128


In [24]:
# Add a bias of 1 to model the linear regression.
X_train_biased = np.c_[np.ones((X_train_scaled.shape[0],1)), X_train_scaled]
X_test_biased = np.c_[np.ones((X_test_scaled.shape[0],1)), X_test_scaled]
print("Biased train data shape: " + str(X_train_biased.shape[0]) + " instances, " + str(X_train_biased.shape[1]) + " features")
print("Biased test data shape: " + str(X_test_biased.shape[0]) + " instances, " + str(X_test_biased.shape[1]) + " features")

Biased train data shape: 16512 instances, 9 features
Biased test data shape: 4128 instances, 9 features


In [27]:
X = tf.constant(X_train_biased, dtype=tf.float32, name="X")
print("Target array shape: " + str(X_train_biased.shape))
# Explicitly turn into an m x 1 vector
y = tf.constant(y_train.reshape(-1,1), dtype=tf.float32, name="y")
print("...as TF constant: " + str(y_train.shape))
XT = tf.transpose(X)

Target array shape: (16512, 9)
...as TF constant: (16512,)


Implement the Normal Equation:
$\theta^{\star} = (X\cdot X^T)^{-1}\cdot{X^T}\cdot{y}$

In [28]:
inv = tf.matrix_inverse( tf.matmul(XT, X) )
theta = tf.matmul( tf.matmul(inv, XT), y )

In [29]:
# Vroom vroom!
with tf.Session() as tf_sess:
    theta_val = theta.eval()

In [30]:
print("Performed a linear regression over the data set:")
print(str(theta_val) + "\n " + str(theta_val.shape))

Performed a linear regression over the data set:
[[ 2.070537  ]
 [ 0.82528734]
 [ 0.12030526]
 [-0.2670337 ]
 [ 0.30959433]
 [-0.00355308]
 [-0.04031251]
 [-0.8987142 ]
 [-0.8748418 ]]
 (9, 1)


## Manual Gradient Descent via TF

I will re-use the scaled data from above and implement gradient descent manually rather than use the normal equation solution.

In [50]:
# For grins, make a new graph for this implementation.
gd_graph = tf.Graph()
m = X_train_biased.shape[0]
n = X_train_biased.shape[1]

n_epochs = 2000
alpha = 0.01 # learning rate

with gd_graph.as_default():
    X = tf.constant(X_train_biased, dtype=tf.float32, name="X")
    y = tf.constant(y_train.reshape(-1,1), dtype=tf.float32, name="y")
    # Initialize theta variables with uniform random values
    theta = tf.Variable( tf.random_uniform([n, 1], -1.0, 1.0), name="theta" )
    # Compute the predictions and error
    y_pred = tf.matmul( X, theta, name="predictions" )
    error = y_pred - y
    # Call on TF's mse function
    mse = tf.reduce_mean( tf.square(error), name="mse" )
    # Gradient calculations
    dJdtheta = (2.0/m) * tf.matmul( tf.transpose(X), error )
    # Training/learning op. assign() computes a new value and assigns it to a TF variable
    train_op = tf.assign( theta, theta - alpha*dJdtheta )
    
    init_op = tf.global_variables_initializer()

In [49]:
with tf.Session( graph=gd_graph ) as sess:
    sess.run(init_op)
    
    for i in range(n_epochs):
        
        if i % 100 == 0:
            print("Epoch ", i, "MSE = ", mse.eval())
        sess.run(train_op)
    
    # At the end, print the current thetas
    print(theta.eval())


Epoch  0 MSE =  6.9594193
Epoch  100 MSE =  0.7892895
Epoch  200 MSE =  0.64937395
Epoch  300 MSE =  0.61405504
Epoch  400 MSE =  0.5899395
Epoch  500 MSE =  0.57241386
Epoch  600 MSE =  0.5596343
Epoch  700 MSE =  0.55031085
Epoch  800 MSE =  0.5435099
Epoch  900 MSE =  0.5385472
Epoch  1000 MSE =  0.5349258
Epoch  1100 MSE =  0.5322833
Epoch  1200 MSE =  0.53035384
Epoch  1300 MSE =  0.528945
Epoch  1400 MSE =  0.52791625
Epoch  1500 MSE =  0.52716416
Epoch  1600 MSE =  0.52661526
Epoch  1700 MSE =  0.52621436
Epoch  1800 MSE =  0.5259208
Epoch  1900 MSE =  0.52570605
[[ 2.0705311e+00]
 [ 8.2034910e-01]
 [ 1.2728150e-01]
 [-2.4187374e-01]
 [ 2.8220633e-01]
 [-1.0817696e-03]
 [-4.0797561e-02]
 [-8.5305583e-01]
 [-8.2786006e-01]]


The results are pretty good compared to the normal equation. But it would be nice to not have to compute the derivative by hand all the time, especially for more difficult functions, e.g. regularized cost functions. Next, I will use *autodiff* to automatically compute the gradient.

In [58]:
gd_graph2 = tf.Graph()
with gd_graph2.as_default():
    X = tf.constant(X_train_biased, dtype=tf.float32, name="X")
    y = tf.constant(y_train.reshape(-1,1), dtype=tf.float32, name="y")
    # Initialize theta variables with uniform random values
    theta = tf.Variable( tf.random_uniform([n, 1], -1.0, 1.0), name="theta" )
    # Compute the predictions and error
    y_pred = tf.matmul( X, theta, name="predictions" )
    error = y_pred - y
    # Call on TF's mse function
    mse = tf.reduce_mean( tf.square(error), name="mse" )
    # Using tf's autodiff capability
    dJdtheta = tf.gradients( mse, [theta], name="dJdtheta" )[0]
    print(dJdtheta)
    
    # Training/learning op. assign() computes a new value and assigns it to a TF variable
    train_op = tf.assign( theta, theta - alpha*dJdtheta )
    
    init_op = tf.global_variables_initializer()

Tensor("dJdtheta/predictions_grad/MatMul_1:0", shape=(9, 1), dtype=float32)


In [59]:
with tf.Session( graph=gd_graph2 ) as sess:
    sess.run(init_op)
    
    for i in range(n_epochs):
        
        if i % 100 == 0:
            print("Epoch ", i, "MSE = ", mse.eval())
        sess.run(train_op)
    
    # At the end, print the current thetas
    print(theta.eval())


Epoch  0 MSE =  8.066374
Epoch  100 MSE =  0.81876034
Epoch  200 MSE =  0.6727848
Epoch  300 MSE =  0.63128847
Epoch  400 MSE =  0.6028355
Epoch  500 MSE =  0.5821138
Epoch  600 MSE =  0.5669552
Epoch  700 MSE =  0.5558597
Epoch  800 MSE =  0.54773116
Epoch  900 MSE =  0.5417722
Epoch  1000 MSE =  0.5374011
Epoch  1100 MSE =  0.53419083
Epoch  1200 MSE =  0.53183097
Epoch  1300 MSE =  0.5300949
Epoch  1400 MSE =  0.5288158
Epoch  1500 MSE =  0.52787185
Epoch  1600 MSE =  0.52717376
Epoch  1700 MSE =  0.5266568
Epoch  1800 MSE =  0.52627397
Epoch  1900 MSE =  0.5259885
[[ 2.0705311e+00]
 [ 8.2960349e-01]
 [ 1.2945844e-01]
 [-2.5903448e-01]
 [ 2.9624826e-01]
 [-4.5156447e-04]
 [-4.1233450e-02]
 [-8.2855487e-01]
 [-8.0437732e-01]]
