In [1]:
import tensorflow as tf

In [2]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

Instructions for updating:
Colocations handled automatically by placer.


In [7]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

42


In [8]:
sess.close()

## A more efficient way to having to repeat sess.run() is to do the following

In [9]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [10]:
init = tf.global_variables_initializer() #prepare an init node

In [11]:
with tf.Session() as sess:
    init.run() # initialize all variables
    result = f.eval()

In [12]:
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)

42


In [13]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

#### Any time a node is created it gets added to the graph. In order to combat and manage this it is possible to create a new graph and temporarily make it the default graph within a block

In [14]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
x2.graph is graph

True

In [15]:
x2.graph is tf.get_default_graph()

False

### While troubleshooting or coding our graphs, we may run into a case with many overlapping nodes and issues. 

#### In order to correct this we may take advantage of tf.reset_default_graph()

In [16]:
tf.reset_default_graph()

### In the following code below, x and w will both be evaluated twice for y, and z.

In [17]:
w = tf.constant(3)
x = w + 1
y = x + 6
z = x * 4

with tf.Session() as sess:
    print(y.eval()) 
    print(z.eval())

10
16


## Say if we only want to evaluate x and w once, we may do so in just one graph. 

In [18]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y.eval()) 
    print(z.eval())

10
16


### Utilizing tensorflow for more than a scalar vector....
#### Let's do some computations

In [19]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [20]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /Users/davescott/scikit_learn_data


In [22]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()
    print(theta_value)

[[-3.6959320e+01]
 [ 4.3698898e-01]
 [ 9.4245886e-03]
 [-1.0791138e-01]
 [ 6.4842808e-01]
 [-3.9986235e-06]
 [-3.7866351e-03]
 [-4.2142656e-01]
 [-4.3467718e-01]]


## Above the normal equation was utilized in order to calculate theta.

## Instead, let's implement gradient descent to calculate theta for us.

### Recall for gradient descent that data normalization needs to be undertaken before we work with the data. Otherwise training will take much more time.

In [25]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

In [29]:
n_epochs = 5000
learning_rate = 0.01

In [32]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 7.3624015
Epoch 100 MSE = 4.9162626
Epoch 200 MSE = 4.855857
Epoch 300 MSE = 4.8405457
Epoch 400 MSE = 4.8305283
Epoch 500 MSE = 4.8232937
Epoch 600 MSE = 4.818017
Epoch 700 MSE = 4.814162
Epoch 800 MSE = 4.8113365
Epoch 900 MSE = 4.8092623
Epoch 1000 MSE = 4.8077374
Epoch 1100 MSE = 4.806612
Epoch 1200 MSE = 4.805779
Epoch 1300 MSE = 4.805162
Epoch 1400 MSE = 4.8047013
Epoch 1500 MSE = 4.804358
Epoch 1600 MSE = 4.8041
Epoch 1700 MSE = 4.8039064
Epoch 1800 MSE = 4.8037586
Epoch 1900 MSE = 4.8036466
Epoch 2000 MSE = 4.8035617
Epoch 2100 MSE = 4.8034954
Epoch 2200 MSE = 4.8034453
Epoch 2300 MSE = 4.803406
Epoch 2400 MSE = 4.8033743
Epoch 2500 MSE = 4.8033514
Epoch 2600 MSE = 4.803332
Epoch 2700 MSE = 4.8033166
Epoch 2800 MSE = 4.8033056
Epoch 2900 MSE = 4.803295
Epoch 3000 MSE = 4.803288
Epoch 3100 MSE = 4.8032813
Epoch 3200 MSE = 4.803276
Epoch 3300 MSE = 4.803272
Epoch 3400 MSE = 4.803269
Epoch 3500 MSE = 4.803266
Epoch 3600 MSE = 4.803264
Epoch 3700 MSE = 4.803262
Epoch 

### A good solution is found well before many iterations are ran.

## Unfortunately, manually inputting gradients for every line in a tensorflow calculation is extremely tedious. 
### Instead we may take advantage of the autodiff approach, which utilizes a reverse-mode autodiff, which AUTOMATICALLY and EFFICIENTLY computes gradients. 

# Sounds like a dream for an engineer who loves efficiency :)

In [33]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 8.79953
Epoch 100 MSE = 4.979629
Epoch 200 MSE = 4.8882
Epoch 300 MSE = 4.8667197
Epoch 400 MSE = 4.851765
Epoch 500 MSE = 4.84056
Epoch 600 MSE = 4.8320932
Epoch 700 MSE = 4.825665
Epoch 800 MSE = 4.8207593
Epoch 900 MSE = 4.8169975
Epoch 1000 MSE = 4.814098
Epoch 1100 MSE = 4.811851
Epoch 1200 MSE = 4.810101
Epoch 1300 MSE = 4.8087316
Epoch 1400 MSE = 4.807654
Epoch 1500 MSE = 4.8068013
Epoch 1600 MSE = 4.8061237
Epoch 1700 MSE = 4.8055835
Epoch 1800 MSE = 4.805151
Epoch 1900 MSE = 4.8048024
Epoch 2000 MSE = 4.804521
Epoch 2100 MSE = 4.804293
Epoch 2200 MSE = 4.804108
Epoch 2300 MSE = 4.803956
Epoch 2400 MSE = 4.803833
Epoch 2500 MSE = 4.8037314
Epoch 2600 MSE = 4.803649
Epoch 2700 MSE = 4.8035808
Epoch 2800 MSE = 4.8035245
Epoch 2900 MSE = 4.8034782
Epoch 3000 MSE = 4.80344
Epoch 3100 MSE = 4.8034086
Epoch 3200 MSE = 4.8033824
Epoch 3300 MSE = 4.8033605
Epoch 3400 MSE = 4.8033423
Epoch 3500 MSE = 4.803327
Epoch 3600 MSE = 4.803315
Epoch 3700 MSE = 4.8033047
Epoch 3800 

## Simply replace the manual calculation of gradients line for the line

### gradients = tf.gradients(mse, [theta])[0]

# To make things EVEN BETTER tensorflow has built in optimizers, even one for gradient descent. Yep. Making things that much better.

## we may go and make adjustments to the prior code with a simple optimizer replace training_op

In [36]:
import time
start = time.time()
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
end = time.time()
print(end - start)

Epoch 0 MSE = 6.402991
Epoch 100 MSE = 4.87908
Epoch 200 MSE = 4.8554063
Epoch 300 MSE = 4.8408113
Epoch 400 MSE = 4.8303423
Epoch 500 MSE = 4.8227935
Epoch 600 MSE = 4.817348
Epoch 700 MSE = 4.8134203
Epoch 800 MSE = 4.810587
Epoch 900 MSE = 4.808544
Epoch 1000 MSE = 4.80707
Epoch 1100 MSE = 4.8060064
Epoch 1200 MSE = 4.805239
Epoch 1300 MSE = 4.804686
Epoch 1400 MSE = 4.804287
Epoch 1500 MSE = 4.8039994
Epoch 1600 MSE = 4.8037915
Epoch 1700 MSE = 4.8036413
Epoch 1800 MSE = 4.8035336
Epoch 1900 MSE = 4.8034554
Epoch 2000 MSE = 4.8033996
Epoch 2100 MSE = 4.803359
Epoch 2200 MSE = 4.80333
Epoch 2300 MSE = 4.803308
Epoch 2400 MSE = 4.8032937
Epoch 2500 MSE = 4.8032823
Epoch 2600 MSE = 4.8032746
Epoch 2700 MSE = 4.803269
Epoch 2800 MSE = 4.803264
Epoch 2900 MSE = 4.803262
Epoch 3000 MSE = 4.80326
Epoch 3100 MSE = 4.803258
Epoch 3200 MSE = 4.8032565
Epoch 3300 MSE = 4.8032565
Epoch 3400 MSE = 4.803255
Epoch 3500 MSE = 4.8032546
Epoch 3600 MSE = 4.8032546
Epoch 3700 MSE = 4.8032546
Epoch 38

### Simple, with just a few lines of code we utilize a built in optimizer. No big deal

### other optimizers may be used, for example, a momentum optimizer
# please note that often times momentum optimizers can be much faster for convergence than gradient descent

In [37]:
start = time.time()
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.75)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
end = time.time()
print(end - start)

Epoch 0 MSE = 13.002845
Epoch 100 MSE = 4.8472853
Epoch 200 MSE = 4.815852
Epoch 300 MSE = 4.8070564
Epoch 400 MSE = 4.804488
Epoch 500 MSE = 4.8036904
Epoch 600 MSE = 4.8034225
Epoch 700 MSE = 4.8033233
Epoch 800 MSE = 4.803285
Epoch 900 MSE = 4.803268
Epoch 1000 MSE = 4.8032603
Epoch 1100 MSE = 4.8032565
Epoch 1200 MSE = 4.803255
Epoch 1300 MSE = 4.8032546
Epoch 1400 MSE = 4.8032546
Epoch 1500 MSE = 4.8032537
Epoch 1600 MSE = 4.8032546
Epoch 1700 MSE = 4.803254
Epoch 1800 MSE = 4.803254
Epoch 1900 MSE = 4.803254
Epoch 2000 MSE = 4.803254
Epoch 2100 MSE = 4.803254
Epoch 2200 MSE = 4.803254
Epoch 2300 MSE = 4.8032537
Epoch 2400 MSE = 4.8032546
Epoch 2500 MSE = 4.8032537
Epoch 2600 MSE = 4.803254
Epoch 2700 MSE = 4.803254
Epoch 2800 MSE = 4.803254
Epoch 2900 MSE = 4.8032537
Epoch 3000 MSE = 4.803254
Epoch 3100 MSE = 4.803254
Epoch 3200 MSE = 4.803254
Epoch 3300 MSE = 4.803254
Epoch 3400 MSE = 4.803254
Epoch 3500 MSE = 4.803254
Epoch 3600 MSE = 4.803254
Epoch 3700 MSE = 4.803254
Epoch 38

# you betcha, more than 25%  faster for convergence using a momentum optimizer

# Implementing batch gradient descent with tensorflow
## to implement Mini-batch GD, we must replace X and y at every iteration.
### in order to do so, placeholder nodes can be utilized. They just output the data we tell them to output at runtime.

## in practice, they are typically used to pass training data to TensorFlow during training a value must be specified at runtime, otherwise an exception occurs

### specifying ,None, for a dimension means that the placeholder node can be any size

# Example for Placeholder B = A * 5
# C = A + 5

In [39]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A * 5
C = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})
    C_val_1 = C.eval(feed_dict={A: [[1, 2, 3]]})
    C_val_2 = C.eval(feed_dict={A: [[4, 5, 6], [7, 8, 9]]})


print(B_val_1)

print(B_val_2)

print(C_val_1)

print(C_val_2)


[[ 5. 10. 15.]]
[[20. 25. 30.]
 [35. 40. 45.]]
[[6. 7. 8.]]
[[ 9. 10. 11.]
 [12. 13. 14.]]


## For mini-batch gradient descent we need to first make X and y placeholder nodes

In [40]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 5000
n_batches = int(np.ceil(m / batch_size))

#### Note: recall ceil is for rounding up integers.
## For the execution, feed in mini-batches one by one, provide the value of X and y via feed_dict parameter

In [44]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
batch_size = 5000
n_batches = int(np.ceil(m / batch_size))
def fetch_batch(epoch, batch_index, batch_size):
    [...] # whatever the directory with data would be
    return X_batch, y_batch
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()

NameError: name 'X_batch' is not defined