# Deep Learning & Neural Networks
## Project 1 - Nonlinear Regression

### SETUP

In [None]:
# Load TensorFlow
import tensorflow as tf
# Load numpy - adds MATLAB/Julia-style math to Python
import numpy as np
# Load matplotlib for plotting
%matplotlib inline
import matplotlib.pyplot as plt
# We'll be doing 3D plotting, so we need Axes3D too
from mpl_toolkits.mplot3d import Axes3D

In [None]:
# We'll use the classic Rosenbrock function
def rosenbrock(x,y):
    a, b = 1.0, 100.0
    return (a - x) ** 2 + b * (y - x**2) ** 2
# Lets plot our function
x = np.outer(np.linspace(-2,+2,100), np.ones(100))
y = np.outer(np.ones(100), np.linspace(-1,+4,100))
z = rosenbrock(x, y)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot_surface(x, y, z, cmap=plt.cm.jet)

In [None]:
# Lets generate some training data
N_TRAIN = 500
# Sample x from -2 to +2
x_train = np.random.rand(N_TRAIN) * 4 - 2
# Sample y from -1 to 4
y_train = np.random.rand(N_TRAIN) * 5 - 1
# Calculate z
z_train = rosenbrock(x_train, y_train)
# Plot the training set
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot_surface(x, y, z, cmap=plt.cm.jet, alpha=0.2)
ax.scatter(x_train, y_train, z_train, c="white", s=10)

### LIVE CODING BEGINS

In [None]:
# OK, lets start with Tensorflow
# TF is kind of like JuMP
# - Instead of a model, we have a session and a graph
# - For "unknowns", we have variables in both
# - We need to describe relationships between the variables
#   and what we're optimizing for too
# See slides for architecture
# - We'll have N hidden neurons
# - Each as three inputs: x, y, and a bias term -> 3N unknowns
# - Then the output neuron has N inputs and a bias -> N+1 unknowns

N_HIDDEN = 20

# Pretty much everything we will be doing is the manipulation of
# tensors (or matrices), hence the name.
# The first tensor we define is the input to the graph.
# When calculating the error of our network, it'd be nice to
# calculate the error for multiple training points, or all of
# them, simultaneously. The number of points we train on at a
# time is the batch size.
# So we'll define a placeholder tensor with an unspecified
# number of rows and 2 columns (one for x, one for y).
net_input = tf.placeholder(tf.float32, [None,2])

# We now need to create a tensor for the hidden weights
# We have 2 inputs to each unit, and we have N_HIDDEN units,
# so we need a 2 x N_HIDDEN weight tensor to calculate the
# N_HIDDEN inputs, plus a N_HIDDEN vector of biases

# We can initialize the weights and bias to random [-1, +1]
hidden_W = tf.Variable(tf.random_uniform([2,N_HIDDEN], -1.0, +1.0))
hidden_b = tf.Variable(tf.random_uniform([  N_HIDDEN], -1.0, +1.0))

# Now we can create an expression for the combination of the
# inputs and this biases. This doesn't actually calculate
# anything - its just describing what should be calculated!
# Recall that our input_tensor is ? x 2, and hidden_W is 2 x N_H
# so the multiplication will be ? x N_H
# The bias term is a vector though, so how do we add them?
# TF provides a add_bias function that adds the bias to each row
hidden_in = tf.nn.bias_add(tf.matmul(net_input, hidden_W), hidden_b)

# Now we can put these inputs through our activation function
# This is calculated elementwise
hidden_out = tf.nn.sigmoid(hidden_in)
# tf.nn has other options, check out the documentation!

# We now have the output layer. It needs one weight for every
# hidden neuron, and a single bias term
output_W = tf.Variable(tf.random_uniform([N_HIDDEN,1], -1.0, +1.0))
output_b = tf.Variable(tf.random_uniform([1], -1.0, +1.0))
output_in = tf.nn.bias_add(tf.matmul(hidden_out, output_W), output_b)
net_output = output_in  # linear!

# We now need to define our loss function
# We'll use good-old square error!
# First, create placeholder for the "true" value
exp_output = tf.placeholder(tf.float32, [None,1])
sq_error = tf.square(net_output - exp_output)
mse = tf.reduce_mean(sq_error)

# We'll use gradient descent to train out network
# TensorFlow has many algorithms, and will handle
# calculating the derivatives itself!
optimizer = tf.train.GradientDescentOptimizer(0.0005) # Sigmoid
train_step = optimizer.minimize(mse)

# Now, create an operation to initialize all the variables
init = tf.initialize_all_variables()

# Create a Session - Sessions contain all run of a graph
sess = tf.Session()

# Run the initialization operation
sess.run(init)

### LIVE CODING ENDS

In [None]:
# Run the network to get the output on our training data
# Before we do, we need to mash our training data into
# a tensor
train_xy_tensor = np.vstack((x_train, y_train)).T
train_z_tensor = np.reshape(z_train, (N_TRAIN,1))
initial_z = sess.run(net_output, feed_dict={
                        net_input: train_xy_tensor,
                        exp_output: train_z_tensor})
initial_z = np.reshape(initial_z, len(initial_z))
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.scatter(x_train, y_train, z_train, c="white", s=10)
ax.scatter(x_train, y_train, initial_z, c="purple", s=10)

In [None]:
# Run some gradient steps
train_errors = []
for i in range(10000):
    train_error, _ = sess.run((mse, train_step),
                            feed_dict={
                                net_input: train_xy_tensor,
                                exp_output: train_z_tensor})
    if i % 500 == 0: print i, train_error
    train_errors.append(train_error)
plt.plot(range(10000), train_errors, 'b-')

In [None]:
train_error, trained_z = sess.run((mse,net_output), feed_dict={
                        net_input: train_xy_tensor,
                        exp_output: train_z_tensor})
trained_z = np.reshape(trained_z, len(trained_z))
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.scatter(x_train, y_train, z_train, c="white", s=10)
ax.scatter(x_train, y_train, trained_z, c="purple", s=10)
print z_train[1:10]
print trained_z[1:10]
print train_error

In [None]:
# Now we've built our function approximator, we can
# look at what it thinks the function is
x_test_mat = np.outer(np.linspace(-2,+2,100), np.ones(100))
y_test_mat = np.outer(np.ones(100), np.linspace(-1,+4,100))
x_test = np.reshape(x_test_mat, (100*100,1))
y_test = np.reshape(y_test_mat, (100*100,1))
test_xy_tensor = np.hstack((x_test, y_test))
net_z_test = sess.run(net_output, feed_dict={net_input: test_xy_tensor})
z_test_mat = np.reshape(net_z_test, (100,100))
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot_surface(x_test_mat, y_test_mat, z_test_mat, cmap=plt.cm.jet)

In [None]:
x = np.outer(np.linspace(-2,+2,100), np.ones(100))
y = np.outer(np.ones(100), np.linspace(-1,+4,100))
z = rosenbrock(x, y)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot_surface(x, y, z, cmap=plt.cm.jet)
print np.mean((z - z_test_mat)**2)