<a href="https://colab.research.google.com/github/jackiekuen2/notes-handson-ml-tf/blob/master/ch9_TensorFlow_GradientDescent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# a = []
# while(1):
#     a.append("1")

In [0]:
%tensorflow_version 1.x

import tensorflow as tf

print(tf.__version__)

TensorFlow 1.x selected.
1.15.2


In [0]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/scikit_learn_data


In [0]:
housing.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR'])

In [0]:
housing.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

## First, Feature Scaling before training

In [0]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
m, n = scaled_housing_data.shape

scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [0]:
print(m, n)

20640 8


In [0]:
# Reset graph
tf.reset_default_graph()

## Method 1. Manually Computing the Grdients

In [0]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Randomly initialize a tensor theta, tf.random_uniform() similar to np.rand()
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y

# Mean Square Error, tf.reduce_mean similar to np.mean()
mse = tf.reduce_mean(tf.square(error), name='mse')

gradients = 2/m * tf.matmul(tf.transpose(X), error)

# Update theta
training_op = tf.assign(theta, theta - learning_rate*gradients)

# Initialize global variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0: # Print out MSE every 100 iterations
            print("Epoch {} MSE={}".format(epoch, mse.eval()))
        sess.run(training_op)
    
    best_theta = theta.eval()

Epoch 0 MSE=11.618226051330566
Epoch 100 MSE=1.0208406448364258
Epoch 200 MSE=0.719840407371521
Epoch 300 MSE=0.6597663164138794
Epoch 400 MSE=0.6219435334205627
Epoch 500 MSE=0.5949181914329529
Epoch 600 MSE=0.5754101276397705
Epoch 700 MSE=0.5613120794296265
Epoch 800 MSE=0.5511195659637451
Epoch 900 MSE=0.5437483191490173


In [0]:
best_theta

array([[ 2.0685523 ],
       [ 0.7329799 ],
       [ 0.14655156],
       [ 0.00294576],
       [ 0.04592052],
       [ 0.0068636 ],
       [-0.03961714],
       [-0.787969  ],
       [-0.7432216 ]], dtype=float32)

In [0]:
tf.reset_default_graph()

## Method 2. Using autodiff

In [0]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Randomly initialize a tensor theta, tf.random_uniform() similar to np.rand()
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y

# Mean Square Error, tf.reduce_mean similar to np.mean()
mse = tf.reduce_mean(tf.square(error), name='mse')

# Using autodiff: Compute gradient vectors of the MSE w.r.t theta
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
gradients = tf.gradients(mse, [theta])[0]

# Update theta
training_op = tf.assign(theta, theta - learning_rate*gradients)

# Initialize global variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0: # Print out MSE every 100 iterations
            print("Epoch {} MSE={}".format(epoch, mse.eval()))
        sess.run(training_op)
    
    best_theta = theta.eval()

print("Best theta: ", best_theta)

Epoch 0 MSE=3.9721925258636475
Epoch 100 MSE=0.8189903497695923
Epoch 200 MSE=0.7060704827308655
Epoch 300 MSE=0.6669353246688843
Epoch 400 MSE=0.6382347941398621
Epoch 500 MSE=0.615772545337677
Epoch 600 MSE=0.5980274677276611
Epoch 700 MSE=0.5839371681213379
Epoch 800 MSE=0.5726980566978455
Epoch 900 MSE=0.5636948943138123
Best theta:  [[ 2.0685525 ]
 [ 0.9796639 ]
 [ 0.17247641]
 [-0.5011466 ]
 [ 0.4794834 ]
 [ 0.01309427]
 [-0.04707039]
 [-0.3673576 ]
 [-0.3531609 ]]


In [0]:
tf.reset_default_graph()

## Method 3. Using an Optimizer (#1 Gradient Descent Optimizer)

In [0]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Randomly initialize a tensor theta, tf.random_uniform() similar to np.rand()
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y

# Mean Square Error, tf.reduce_mean similar to np.mean()
mse = tf.reduce_mean(tf.square(error), name='mse')

# Using GradientDescentOptimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

# Minimize MSE
training_op = optimizer.minimize(mse)

# Initialize global variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0: # Print out MSE every 100 iterations
            print("Epoch {} MSE={}".format(epoch, mse.eval()))
        sess.run(training_op)
    
    best_theta = theta.eval()

print("Best theta: ", best_theta)

Epoch 0 MSE=6.178981781005859
Epoch 100 MSE=0.7283284068107605
Epoch 200 MSE=0.646472156047821
Epoch 300 MSE=0.6169953346252441
Epoch 400 MSE=0.5954747796058655
Epoch 500 MSE=0.5792500376701355
Epoch 600 MSE=0.5669466853141785
Epoch 700 MSE=0.5575711131095886
Epoch 800 MSE=0.5503908395767212
Epoch 900 MSE=0.544863760471344
Best theta:  [[ 2.0685525 ]
 [ 0.9035668 ]
 [ 0.16156963]
 [-0.35108522]
 [ 0.3526298 ]
 [ 0.01011347]
 [-0.044525  ]
 [-0.51831335]
 [-0.49495554]]


In [0]:
tf.reset_default_graph()

## Method 3. Using an Optimizer (#2 Momentum Optimizer)

In [0]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Randomly initialize a tensor theta, tf.random_uniform() similar to np.rand()
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y

# Mean Square Error, tf.reduce_mean similar to np.mean()
mse = tf.reduce_mean(tf.square(error), name='mse')

# Using MomentumOptimizer
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                       momentum=0.9)

# Minimize MSE
training_op = optimizer.minimize(mse)

# Initialize global variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0: # Print out MSE every 100 iterations
            print("Epoch {} MSE={}".format(epoch, mse.eval()))
        sess.run(training_op)
    
    best_theta = theta.eval()

print("Best theta: ", best_theta)

Epoch 0 MSE=12.779354095458984
Epoch 100 MSE=0.5309959053993225
Epoch 200 MSE=0.5249325037002563
Epoch 300 MSE=0.5243979692459106
Epoch 400 MSE=0.5243311524391174
Epoch 500 MSE=0.5243222713470459
Epoch 600 MSE=0.5243211984634399
Epoch 700 MSE=0.5243210196495056
Epoch 800 MSE=0.5243210196495056
Epoch 900 MSE=0.5243209600448608
Best theta:  [[ 2.0685577 ]
 [ 0.829628  ]
 [ 0.11875325]
 [-0.2655432 ]
 [ 0.30570975]
 [-0.00450255]
 [-0.03932659]
 [-0.8998659 ]
 [-0.8705223 ]]


In [0]:
tf.reset_default_graph()

# Saving Models



In [0]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Randomly initialize a tensor theta, tf.random_uniform() similar to np.rand()
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y

# Mean Square Error, tf.reduce_mean similar to np.mean()
mse = tf.reduce_mean(tf.square(error), name='mse')

# Using GradientDescentOptimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

# Minimize MSE
training_op = optimizer.minimize(mse)

# Initialize global variables
init = tf.global_variables_initializer()

# Create Saver node
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0: # Print out MSE every 100 iterations
            print("Epoch {} MSE={}".format(epoch, mse.eval()))
            save_path = saver.save(sess, "/tmp/my_model.ckpt") # Save checkpoints
        sess.run(training_op)
    
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt")

print("Best theta: ", best_theta)

Epoch 0 MSE=13.226840019226074
Epoch 100 MSE=0.7598133087158203
Epoch 200 MSE=0.5662651658058167
Epoch 300 MSE=0.5548070073127747
Epoch 400 MSE=0.5485891103744507
Epoch 500 MSE=0.5437779426574707
Epoch 600 MSE=0.5399841666221619
Epoch 700 MSE=0.5369762778282166
Epoch 800 MSE=0.5345802307128906
Epoch 900 MSE=0.5326635241508484
Best theta:  [[ 2.0685525 ]
 [ 0.898035  ]
 [ 0.14359006]
 [-0.3723234 ]
 [ 0.38412875]
 [ 0.00364512]
 [-0.04288624]
 [-0.6545801 ]
 [-0.6321115 ]]


In [0]:
tf.reset_default_graph()

# Restoring Models

## 1. Restoring varialbes only

In [0]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Randomly initialize a tensor theta, tf.random_uniform() similar to np.rand()
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')

y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y

# Mean Square Error, tf.reduce_mean similar to np.mean()
mse = tf.reduce_mean(tf.square(error), name='mse')

# Using GradientDescentOptimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

# Minimize MSE
training_op = optimizer.minimize(mse)

# Initialize global variables
init = tf.global_variables_initializer()

saver = tf.train.Saver()

with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt


In [0]:
best_theta_restored

array([[ 2.0685525 ],
       [ 0.898035  ],
       [ 0.14359006],
       [-0.3723234 ],
       [ 0.38412875],
       [ 0.00364512],
       [-0.04288624],
       [-0.6545801 ],
       [-0.6321115 ]], dtype=float32)

In [0]:
tf.reset_default_graph()

## 2. Restore both the graph structure and trained variables

In [0]:
# Restore the graph structure
saver = tf.train.import_meta_graph("/tmp/my_model_final.ckpt.meta")

# Still need to define theta
theta = tf.get_default_graph().get_tensor_by_name("theta:0")

with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt") # Restore trained variables
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt


In [0]:
best_theta_restored

array([[ 2.0685525 ],
       [ 0.898035  ],
       [ 0.14359006],
       [-0.3723234 ],
       [ 0.38412875],
       [ 0.00364512],
       [-0.04288624],
       [-0.6545801 ],
       [-0.6321115 ]], dtype=float32)