<a href="https://colab.research.google.com/github/hamk3010/DeepLearning/blob/master/Linear%20Regression%20with%20TensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

These ideas were adopted from [Hands-On Machine Learning with Scikit-Learn & Tensorflow](http://index-of.es/Varios-2/Hands%20on%20Machine%20Learning%20with%20Scikit%20Learn%20and%20Tensorflow.pdf). Tensorflow version used to run this code was: 1.13.0-rc0

In [3]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing

# Fetch the housing dataset
housing = fetch_california_housing()
m,n = housing.data.shape

# Add a bias input feature (x0 = 1) to all training instances
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/scikit_learn_data


In [4]:
 print(tf.__version__)

1.13.0-rc0


In [0]:
#  Construction phase
# Create two TensorFlow constant nodes, X and y to hold this data and the targets
X = tf.constant(housing_data_plus_bias, dtype= tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1,1), dtype= tf.float32, name = "y")
XT = tf.transpose(X)
theta = tf.matmul( tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [0]:
# Execution phase

with tf.Session() as sess:
    theta_value = theta.eval()

In [7]:
print(theta_value)

[[-3.71037292e+01]
 [ 4.36282694e-01]
 [ 9.40542948e-03]
 [-1.06901854e-01]
 [ 6.43611908e-01]
 [-4.06625077e-06]
 [-3.78273334e-03]
 [-4.23094332e-01]
 [-4.36462164e-01]]


In [0]:
# Lets normalize the input feature vectors first
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

# Implementing Gradient Descent 🤖
We will implement gradient descent by first manually 🏋️‍♀️computing the gradients, then we will use *autodiff* to compute the gradients automatically. Additioanlly, we will also use optimizers that come with TensorFlow.

In [0]:
#  Construction phase
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype= tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype= tf.float32, name = "Y")

# Create a tensor containing random values 
theta = tf.Variable(tf.random_uniform([ n + 1 , 1 ], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")
# Manually compute gradients
gradients = 2/m * tf.matmul(tf.transpose(X), error)

# assign() function creates a new node that will assign a new value to a variable
#  theta[n] = theta for the next step
#  theta = theta in this step
#  
# Over here it is impplementing Batch Gradient Descent step
#  theta[n] = theta - learning_rate * gradients
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()


In [12]:
with tf.Session() as sess:
  sess.run(init)
  
  for epoch in range(n_epochs):
    if(epoch%100 == 0):
      print("Epoch", epoch, "MSE = ", mse.eval() )
      sess.run(training_op)
  best_theta = theta.eval()

Epoch 0 MSE =  8.86133
Epoch 100 MSE =  8.631515
Epoch 200 MSE =  8.416905
Epoch 300 MSE =  8.216426
Epoch 400 MSE =  8.029071
Epoch 500 MSE =  7.85392
Epoch 600 MSE =  7.690111
Epoch 700 MSE =  7.5368505
Epoch 800 MSE =  7.393403
Epoch 900 MSE =  7.259084


### Using Autodiff  to calculate gradients with the same exact code. 🚘

In [0]:
#  Construction phase
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype= tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype= tf.float32, name = "Y")

# Create a tensor containing random values 
theta = tf.Variable(tf.random_uniform([ n + 1 , 1 ], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

# Use Autodiff to compute gradients
gradients = tf.gradients(mse, [theta])[0]                # <-- CHANGE MADE HERE

# assign() function creates a new node that will assign a new value to a variable
#  theta[n] = theta for the next step
#  theta = theta in this step
#  
# Over here it is impplementing Batch Gradient Descent step
#  theta[n] = theta - learning_rate * gradients
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

In [14]:
with tf.Session() as sess:
  sess.run(init)
  
  for epoch in range(n_epochs):
    if(epoch%100 == 0):
      print("Epoch", epoch, "MSE = ", mse.eval() )
      sess.run(training_op)
  best_theta = theta.eval()

Epoch 0 MSE =  7.6984057
Epoch 100 MSE =  7.5468493
Epoch 200 MSE =  7.404633
Epoch 300 MSE =  7.2711377
Epoch 400 MSE =  7.1457915
Epoch 500 MSE =  7.028057
Epoch 600 MSE =  6.917439
Epoch 700 MSE =  6.81347
Epoch 800 MSE =  6.7157197
Epoch 900 MSE =  6.623784


### Using a Gradient Descent Optimizer 🎢

In [0]:
#  Construction phase
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype= tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype= tf.float32, name = "Y")

# Create a tensor containing random values 
theta = tf.Variable(tf.random_uniform([ n + 1 , 1 ], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

# Use an optimizer to compute gradients
optimizer = tf.train.GradientDescentOptimizer(learning_rate= learning_rate)               # <-- CHANGE MADE HERE
training_op = optimizer.minimize(mse)                                                     # <-- CHANGE MADE HERE

init = tf.global_variables_initializer()

In [16]:
with tf.Session() as sess:
  sess.run(init)
  
  for epoch in range(n_epochs):
    if(epoch%100 == 0):
      print("Epoch", epoch, "MSE = ", mse.eval() )
      sess.run(training_op)
  best_theta = theta.eval()

Epoch 0 MSE =  7.9225955
Epoch 100 MSE =  7.7412086
Epoch 200 MSE =  7.5724034
Epoch 300 MSE =  7.4152513
Epoch 400 MSE =  7.268899
Epoch 500 MSE =  7.1325545
Epoch 600 MSE =  7.0054884
Epoch 700 MSE =  6.887023
Epoch 800 MSE =  6.7765346
Epoch 900 MSE =  6.6734467


## Feeding Data to the training algorithm and implementing Mini-batch Gradient Descent 🛰
In order to implement Mini-batch Gradient Descent we would need to replace X and y at every iteration with the next mini-batch. One way this can be done is by using a **placeholder node.** 💾

A **placeholder node** don't actually perform any computation, they just output data we ask tell them to output during runtime. They are typically used to pass the training data to TensorFlow during training.



In [0]:
#  Construction phase
n_epochs = 1000
learning_rate = 0.01

# To implement Mini-batch Gradient Descent we need to first change the 
# definition oX and y in the construction phase to make them placeholder nodes

X= tf.placeholder(tf.float32, shape=(None, n+1), name = "X")
y = tf.placeholder(tf.float32, shape=(None, 1), name= "y")

# Create a tensor containing random values 
theta = tf.Variable(tf.random_uniform([ n + 1 , 1 ], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

# Use an optimizer to compute gradients
optimizer = tf.train.GradientDescentOptimizer(learning_rate= learning_rate)              
training_op = optimizer.minimize(mse)                                                     

init = tf.global_variables_initializer()

#  define batch size and compute total number of batches
batch_size = 100
n_batches = int(np.ceil(m / batch_size) )


In [0]:
### Dividing the batches phase

# def fetch_batch(epoch, batch_index, batch_size):
#   [...] loading the data from disk step
#  return X_batch, y_batch

In [0]:
# Execution phase

with tf.Session() as sess:
  sess.run(init)
  
  for epoch in range(n_epochs):
    for batch_index in range(n_batches):
      X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size) #<-- fetch batch needs to be implemented above.
      sess.run(training_op, feed_dict={X: X_batch, y:y_batch})
      
  best_theta = theta.eval()

# Saving and Restoring Models 💾
Saving a model on Tensorlfow is done by creating a Saver node at the **end** of the construction phase. Then in the execution phase, we can just call its *save()*  method whenever we want to save the mode, passing the session and path of the checkpoint file.

In [0]:
#  Construction phase
n_epochs = 1000
learning_rate = 0.01

# To implement Mini-batch Gradient Descent we need to first change the 
# definition oX and y in the construction phase to make them placeholder nodes

X= tf.placeholder(tf.float32, shape=(None, n+1), name = "X")
y = tf.placeholder(tf.float32, shape=(None, 1), name= "y")

# Create a tensor containing random values 
theta = tf.Variable(tf.random_uniform([ n + 1 , 1 ], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

# Use an optimizer to compute gradients
optimizer = tf.train.GradientDescentOptimizer(learning_rate= learning_rate)              
training_op = optimizer.minimize(mse)                                                     

init = tf.global_variables_initializer()

saver = tf.train.Saver()

#  Execution
with tf.Session() as sess:
  sess.run(init)
  
  for epoch in range(n_epochs):
    if(epoch%100 == 0):
      save_path = saver.save(sess, "/tmp/my_model.ckpt")  
      
    sess.run(training_op)
  best_theta = theta.eval()
  save_path = saver.save(sess, "/tmp/my_model.ckpt") 

In [0]:
#  The model can be restored instead of calling the init node, we can just 
# call the restore method of the Saver object.

with tf.Session() as sess:
  saver.restore(sess, "/tmp/my_model_final.ckpt")

