# Jonathan Halverson
# Wednesday, October 25, 2017
# Geron Chapter 12: Intro to Tensorflow

Let's import the module and then create a simple computation graph:

In [1]:
import tensorflow as tf

x = tf.Variable(3, name="x")
y = tf.Variable(9, name="y")
f = x * x + x * y + 4

The graph has been created. Note that no computation has taken place as this point. Next we create a TF session and execute the graph:

In [2]:
with tf.Session() as sess:
     x.initializer.run()
     y.initializer.run()
     result = f.eval()

In [3]:
print(result)

40


Note that x.initializer.run() is equivalent to tf.get_default_session().run(x.initializer) and f.eval() is equivalent to calling tf.get_default_session.run(f).

In [4]:
type(x)

tensorflow.python.ops.variables.Variable

In [5]:
type(f)

tensorflow.python.framework.ops.Tensor

In [6]:
type(result)

numpy.int32

### Graph 2

In [7]:
tf.reset_default_graph()

Let's try another graph:

In [8]:
a = tf.Variable(-10.0, name='a')
b = tf.Variable(1e3, name='b')
g = a / b - 1.0
init = tf.global_variables_initializer()

In [9]:
with tf.Session() as sess:
     init.run()
     result = g.eval()

In [10]:
print(result)

-1.01


### Interactive sessions

In [11]:
tf.reset_default_graph()

In [12]:
u = tf.Variable(4, name='u')
v = tf.Variable(7, name='v')
h = v * u

In [13]:
sess = tf.InteractiveSession()
u.initializer.run()
v.initializer.run()
result = sess.run(h)
print(result)
sess.close()

28


One needs to explicitly close the session as is done above.

### Managing graphs

In [14]:
tf.reset_default_graph()

In [15]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [16]:
graph = tf.Graph()
with graph.as_default():
     x2 = tf.Variable(2)

In [17]:
x2.graph is graph

True

In [18]:
x2.graph is tf.get_default_graph()

False

Any node that you create is automatically added to the default graph. You have explicitly create another graph and then make this the default to assign nodes to that graph.

In [19]:
with tf.get_default_graph().as_default():
     x3 = tf.Variable(8, name='x3')

In [20]:
type(tf.get_default_graph())

tensorflow.python.framework.ops.Graph

### Lifecycle of a node

In [21]:
tf.reset_default_graph()

In [22]:
w = tf.constant(5)
x = w + 2
y = x + 5
z = x + 3

In [23]:
with tf.Session() as sess:
     print(y.eval())
     print(z.eval())

12
10


When z is evaluated the DAG is re-traversed. This is inefficient so we can use another way:

In [24]:
with tf.Session() as sess:
     y_val, z_val = sess.run([y, z])
     print(y_val)
     print(z_val)

12
10


### Working with tensors

In [25]:
tf.reset_default_graph()

In [26]:
import numpy as np

x = tf.Variable(np.arange(10.0, 20.0, 1.0))
y = tf.Variable(np.random.rand(10), dtype=tf.float64)
z = x + y
init = tf.global_variables_initializer()

In [27]:
sess = tf.InteractiveSession()
init.run()
print(z.eval())
sess.close()

[ 10.619979    11.75443366  12.61481646  13.92904539  14.99944212
  15.83618576  16.90079236  17.21633076  18.5702605   19.68071358]


### Linear regression with tensorflow (the normal equation)

In [28]:
f = '../machine_learning/geron_housing/housing.csv'

import pandas as pd
data = pd.read_csv(f, header=0)
data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [29]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
longitude             20640 non-null float64
latitude              20640 non-null float64
housing_median_age    20640 non-null float64
total_rooms           20640 non-null float64
total_bedrooms        20433 non-null float64
population            20640 non-null float64
households            20640 non-null float64
median_income         20640 non-null float64
median_house_value    20640 non-null float64
ocean_proximity       20640 non-null object
dtypes: float64(9), object(1)
memory usage: 1.6+ MB


In [30]:
housing_labels = data.median_house_value.copy()
data.drop(['ocean_proximity', 'median_house_value'], axis=1, inplace=True)

### An aside on concatenate and append

In [31]:
z = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
z

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [32]:
np.c_[z, [[-9], [-9], [-9]]]

array([[ 1,  2,  3, -9],
       [ 4,  5,  6, -9],
       [ 7,  8,  9, -9]])

In [33]:
np.c_[z, [-9, -9, -9]]

array([[ 1,  2,  3, -9],
       [ 4,  5,  6, -9],
       [ 7,  8,  9, -9]])

In [34]:
np.concatenate([z, [[-9], [-9], [-9]]], axis=1)

array([[ 1,  2,  3, -9],
       [ 4,  5,  6, -9],
       [ 7,  8,  9, -9]])

In [35]:
np.append(z, [[-8, -8, -8]], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [-8, -8, -8]])

In [36]:
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import StandardScaler

# impute missing data
imp = Imputer(strategy='median')
housing = imp.fit_transform(data)

# standardize the features
std_sc = StandardScaler()
housing = std_sc.fit_transform(housing)

# add bias term
housing = np.c_[np.ones((housing.shape[0], 1)), housing]

In [37]:
m, n = housing.shape
m, n

(20640, 9)

Begin to create the graph:

In [38]:
X = tf.constant(housing, name="X", dtype=tf.float32)
y = tf.constant(housing_labels.values.reshape(-1, 1), name="y", dtype=tf.float32)
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [39]:
with tf.Session() as sess:
     theta_value = theta.eval()

In [40]:
print(theta_value)

[[ 206856.1875    ]
 [ -85369.03125   ]
 [ -90723.1796875 ]
 [  14403.26171875]
 [ -14444.0703125 ]
 [  34037.9453125 ]
 [ -45153.77734375]
 [  30319.48828125]
 [  75520.3125    ]]


In [41]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression().fit(housing[:, 1:], housing_labels)
print lin_reg.intercept_, lin_reg.coef_

206855.816909 [-85369.22518    -90723.40175504  14403.20315262 -14443.94445799
  34037.42560482 -45153.79498679  30319.8204304   75520.30834439]


### Linear regression with tensorflow (the gradient descent)

In [48]:
tf.reset_default_graph()

In [49]:
epochs = 5000
learning_rate = 0.01

In [50]:
X = tf.constant(housing, name="X", dtype=tf.float32)
y = tf.constant(housing_labels.values.reshape(-1, 1), name="y", dtype=tf.float32)
theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = (2.0 / m) * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()

The graph is now complete. Now we start a session to execute the computation graph:

In [52]:
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(epochs):
          if (epoch % 500 == 0):
               print('Epoch', epoch, "MSE=", mse.eval())
          sess.run(training_op)
     best_theta = theta.eval()

('Epoch', 0, 'MSE=', 5.6104505e+10)
('Epoch', 100, 'MSE=', 6.8155387e+09)
('Epoch', 200, 'MSE=', 5.6269732e+09)
('Epoch', 300, 'MSE=', 5.399807e+09)
('Epoch', 400, 'MSE=', 5.2504458e+09)
('Epoch', 500, 'MSE=', 5.143806e+09)
('Epoch', 600, 'MSE=', 5.0669548e+09)
('Epoch', 700, 'MSE=', 5.0112026e+09)
('Epoch', 800, 'MSE=', 4.9705477e+09)
('Epoch', 900, 'MSE=', 4.9407503e+09)
('Epoch', 1000, 'MSE=', 4.9188019e+09)
('Epoch', 1100, 'MSE=', 4.9025572e+09)
('Epoch', 1200, 'MSE=', 4.8904909e+09)
('Epoch', 1300, 'MSE=', 4.8814746e+09)
('Epoch', 1400, 'MSE=', 4.8747167e+09)
('Epoch', 1500, 'MSE=', 4.8696172e+09)
('Epoch', 1600, 'MSE=', 4.8657556e+09)
('Epoch', 1700, 'MSE=', 4.8628209e+09)
('Epoch', 1800, 'MSE=', 4.8605691e+09)
('Epoch', 1900, 'MSE=', 4.8588431e+09)
('Epoch', 2000, 'MSE=', 4.8575007e+09)
('Epoch', 2100, 'MSE=', 4.8564608e+09)
('Epoch', 2200, 'MSE=', 4.8556508e+09)
('Epoch', 2300, 'MSE=', 4.8550047e+09)
('Epoch', 2400, 'MSE=', 4.8545024e+09)
('Epoch', 2500, 'MSE=', 4.8540959e+09)


In [53]:
print(best_theta)

[[ 206855.4375    ]
 [ -85015.7421875 ]
 [ -90382.6328125 ]
 [  14422.953125  ]
 [ -14494.73925781]
 [  33150.40234375]
 [ -45356.4296875 ]
 [  31449.90625   ]
 [  75547.515625  ]]


### Using Autodiff

Tensorflow offers several ways to compute gradients using autodiff:

In [54]:
tf.reset_default_graph()

In [55]:
X = tf.constant(housing, name="X", dtype=tf.float32)
y = tf.constant(housing_labels.values.reshape(-1, 1), name="y", dtype=tf.float32)
theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = tf.gradients(mse, [theta])[0] # reverse-mode autodiff
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()

In [56]:
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(epochs):
          if (epoch % 500 == 0):
               print('Epoch', epoch, "MSE=", mse.eval())
          sess.run(training_op)
     best_theta = theta.eval()

('Epoch', 0, 'MSE=', 5.610487e+10)
('Epoch', 500, 'MSE=', 5.1438024e+09)
('Epoch', 1000, 'MSE=', 4.9188019e+09)
('Epoch', 1500, 'MSE=', 4.8696166e+09)
('Epoch', 2000, 'MSE=', 4.8575012e+09)
('Epoch', 2500, 'MSE=', 4.8540954e+09)
('Epoch', 3000, 'MSE=', 4.8529992e+09)
('Epoch', 3500, 'MSE=', 4.8525962e+09)
('Epoch', 4000, 'MSE=', 4.8524242e+09)
('Epoch', 4500, 'MSE=', 4.852353e+09)


### Using an optimizer

In [57]:
tf.reset_default_graph()

In [58]:
X = tf.constant(housing, name="X", dtype=tf.float32)
y = tf.constant(housing_labels.values.reshape(-1, 1), name="y", dtype=tf.float32)
theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # define optimizer
training_op = optimizer.minimize(mse) # tell it what to minimize
init = tf.global_variables_initializer()

In [59]:
with tf.Session() as sess:
     sess.run(init)
     for epoch in range(epochs):
          if (epoch % 500 == 0):
               print('Epoch', epoch, "MSE=", mse.eval())
          sess.run(training_op)
     best_theta = theta.eval()

('Epoch', 0, 'MSE=', 5.6104636e+10)
('Epoch', 500, 'MSE=', 5.1438008e+09)
('Epoch', 1000, 'MSE=', 4.9188014e+09)
('Epoch', 1500, 'MSE=', 4.8696161e+09)
('Epoch', 2000, 'MSE=', 4.8575017e+09)
('Epoch', 2500, 'MSE=', 4.8540948e+09)
('Epoch', 3000, 'MSE=', 4.8529997e+09)
('Epoch', 3500, 'MSE=', 4.8525962e+09)
('Epoch', 4000, 'MSE=', 4.8524242e+09)
('Epoch', 4500, 'MSE=', 4.852353e+09)


### Implementing Newton-Raphson in TF

In [68]:
tf.reset_default_graph()

In [69]:
x = tf.Variable(2.0, name="x")
training_op = tf.assign(x, x - (x * x - 3.0) / (2.0 * x))

In [72]:
with tf.Session() as sess:
     x.initializer.run()
     for i in range(10):
          sess.run(training_op)
     root = x.eval()
print root, root / 3**0.5

1.73205 0.999999982052


### Working with placeholders

In [60]:
tf.reset_default_graph()

In [61]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
     B_val_1 = B.eval(feed_dict={A:[[1, 2, 3]]})
     B_val_2 = B.eval(feed_dict={A:[[1, 2, 3], [4, 5, 6]]})
print(B_val_1)
print(B_val_2)

[[ 6.  7.  8.]]
[[  6.   7.   8.]
 [  9.  10.  11.]]


An exception will be thrown if you don't specify a value for a placeholder at run time. With a placeholder node you can implement mini-batch optimization.