# Linear Regression with Tensorflow

## Linear Regression using Normal Equation

Determine $\hat{\theta}$ directly using the Normal Equation:

$$
\hat{\theta} = (X^T \cdot X)^{-1} \cdot X^T \cdot y .
$$

In [68]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
# Get dataset dimensions
m, n = housing.data.shape
m, n

(20640, 8)

In [4]:
print(housing.data[0])

housing.target

[   8.3252       41.            6.98412698    1.02380952  322.
    2.55555556   37.88       -122.23      ]


array([ 4.526,  3.585,  3.521, ...,  0.923,  0.847,  0.894])

In [5]:
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]
housing_data_plus_bias[0]

array([   1.        ,    8.3252    ,   41.        ,    6.98412698,
          1.02380952,  322.        ,    2.55555556,   37.88      , -122.23      ])

In [7]:
# Define two constants
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
# Get the transpose of X
XT = tf.transpose(X)
# Using the normal equation
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [8]:
# Create a TensorFlow session to run parts of the graph
with tf.Session() as sess:
    theta_value = theta.eval()
    
theta_value

array([[ -3.74651413e+01],
       [  4.35734153e-01],
       [  9.33829229e-03],
       [ -1.06622010e-01],
       [  6.44106984e-01],
       [ -4.25131839e-06],
       [ -3.77322501e-03],
       [ -4.26648885e-01],
       [ -4.40514028e-01]], dtype=float32)

In [62]:
tf.reset_default_graph()

## Linear Regression using Stochastic Gradient Descent

In [31]:
# Gradient Descent requires scaling the feature vectors first
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [70]:
n_epochs = 2000
learning_rate = 0.008

# Define two constants
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
# Define theta variable, set initial random weights 
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
# Predictions
y_pred = tf.matmul(X, theta, name="predictions")
# Error: predicted values - actual values
error = y_pred - y
# Mean Square Error
mse = tf.reduce_mean(tf.square(error), name="mse")

# TensorFlow computes the gradients for you and
# provides a number of optimizers out of the box, 
# including a Gradient Descent optimizer.
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

# Set init node to initialize variables later
init = tf.global_variables_initializer()

# Start a session 
with tf.Session() as sess:
    sess.run(init) # initialize variables
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch ", epoch, "MSE =", mse.eval())
        sess.run(training_op) # run training operation
        
    best_theta = theta.eval()
    
print("\nBest theta: \n", best_theta)

Epoch  0 MSE = 13.061
Epoch  100 MSE = 1.22624
Epoch  200 MSE = 0.73448
Epoch  300 MSE = 0.674533
Epoch  400 MSE = 0.641452
Epoch  500 MSE = 0.616325
Epoch  600 MSE = 0.596765
Epoch  700 MSE = 0.581487
Epoch  800 MSE = 0.569535
Epoch  900 MSE = 0.56017
Epoch  1000 MSE = 0.552818
Epoch  1100 MSE = 0.547036
Epoch  1200 MSE = 0.542478
Epoch  1300 MSE = 0.538878
Epoch  1400 MSE = 0.536029
Epoch  1500 MSE = 0.533766
Epoch  1600 MSE = 0.531965
Epoch  1700 MSE = 0.530529
Epoch  1800 MSE = 0.529378
Epoch  1900 MSE = 0.528456

Best theta: 
 [[ 2.06855083]
 [ 0.85623604]
 [ 0.13885899]
 [-0.28754085]
 [ 0.31146556]
 [ 0.00246625]
 [-0.04159471]
 [-0.72831511]
 [-0.70070046]]
