# Linear Regression with Tensorflow

Here the TensorFlow Library will be used to built a regression model using the Boston Housing Dataset. Gradient Decent and Mini-Batch Gradient Decent will be applied as estimators. 

## 1. Using the Normal Equation

In [0]:
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np


In [0]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

In [3]:
housing = fetch_california_housing()

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/scikit_learn_data


In [0]:
m, n = housing.data.shape

add an extra bias input feature $ x_0 = 1 $

In [0]:
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

From probabilistic view we can write the model like: $ p(y|x, \theta) = \mathcal{N}(y|\mu(x),  \sigma^2(x) ) $. Assuming that $ \mu $ is a linear function of $x$, so $ \mu = w^Tx $, and that the noise is fixed, $ \sigma^2(x) = \sigma^2$, we have as model parameters: $ \theta = (w, \sigma^2) $ 

Then we create two TensorFlow constant nodes, $ X $ and $ y $, to hold this
data and the targets. 

To estimate it (in the multivariate case) we use that   $ \tilde{\theta} = (X^TX)^{-1}X^Ty $.   For the sake of this we use matrix operation provided by TF :

In [0]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

Create session and use it to evaluate $ \theta $

In [0]:
with tf.Session() as sess:
  theta_value = theta.eval()

In [0]:
theta_value

array([[-3.7112991e+01],
       [ 4.3611991e-01],
       [ 9.4082914e-03],
       [-1.0654381e-01],
       [ 6.4201808e-01],
       [-4.0360574e-06],
       [-3.7822633e-03],
       [-4.2303962e-01],
       [-4.3648642e-01]], dtype=float32)

## 2. Use Batch Gradient Descent

In [0]:
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

### Manually computing the gradients

In [0]:
n_epochs = 1000
learning_rate = 0.01


X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)
init = tf.global_variables_initializer()

with tf.Session() as sess:
  sess.run(init)
  
  for epoch in range(n_epochs):
    if epoch % 100 == 0:
      print("Epoch", epoch, "MSE =", mse.eval())
    sess.run(training_op)
    
  best_theta = theta.eval()

Epoch 0 MSE = 14.956574
Epoch 100 MSE = 0.80838776
Epoch 200 MSE = 0.6133018
Epoch 300 MSE = 0.5873682
Epoch 400 MSE = 0.57049286
Epoch 500 MSE = 0.55823433
Epoch 600 MSE = 0.54929143
Epoch 700 MSE = 0.5427558
Epoch 800 MSE = 0.53797114
Epoch 900 MSE = 0.5344607


In [0]:
print(best_theta)

[[ 2.0685523 ]
 [ 0.89949673]
 [ 0.1579444 ]
 [-0.34874737]
 [ 0.35304967]
 [ 0.00884679]
 [-0.04413138]
 [-0.5485535 ]
 [-0.5249807 ]]


## 3. Use Gradient Descent (with Optimizer)

In [0]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")



optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)



init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print("Best theta:")
print(best_theta)



Epoch 0 MSE = 2.7544262
Epoch 100 MSE = 0.632222
Epoch 200 MSE = 0.57278043
Epoch 300 MSE = 0.5585007
Epoch 400 MSE = 0.54907
Epoch 500 MSE = 0.54228795
Epoch 600 MSE = 0.53737885
Epoch 700 MSE = 0.53382194
Epoch 800 MSE = 0.53124255
Epoch 900 MSE = 0.5293704
Best theta:
[[ 2.0685525e+00]
 [ 7.7407807e-01]
 [ 1.3119237e-01]
 [-1.1784508e-01]
 [ 1.6477816e-01]
 [ 7.4407790e-04]
 [-3.9194509e-02]
 [-8.6135662e-01]
 [-8.2347977e-01]]


### 4. Use Mini-Batch Gradient Decent (via Placeholders)

In [0]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")



optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()


n_epochs = 10

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size=batch_size)  # not shown
    X_batch = scaled_housing_data_plus_bias[indices] # not shown
    y_batch = housing.target.reshape(-1, 1)[indices] # not shown
    return X_batch, y_batch

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()


In [10]:
best_theta

array([[ 2.070016  ],
       [ 0.8204561 ],
       [ 0.11731729],
       [-0.22739056],
       [ 0.31134024],
       [ 0.00353192],
       [-0.01126995],
       [-0.9164395 ],
       [-0.8795009 ]], dtype=float32)