### Implement Linear Regression by Normal Equation and Gradient Descent

In [1]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
import sklearn

In [2]:
housing = fetch_california_housing()

### Linear regreesion with Normal Equation 

In [3]:
m, n = housing.data.shape

In [4]:
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [5]:
X = tf.constant(housing_data_plus_bias, dtype = tf.float32, name = "X")

In [6]:
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")

In [7]:
XT = tf.transpose(X)

In [8]:
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [9]:
with tf.Session() as sess:
    theta_value = theta.eval()

In [10]:
theta_value

array([[-3.7465141e+01],
       [ 4.3573415e-01],
       [ 9.3382923e-03],
       [-1.0662201e-01],
       [ 6.4410698e-01],
       [-4.2513184e-06],
       [-3.7732250e-03],
       [-4.2664889e-01],
       [-4.4051403e-01]], dtype=float32)

### Linear regression with Gradient Descent by manually compute gradients

In [11]:
n_epochs = 1000
learning_rate = 0.01

In [12]:
scaled_housing_data = sklearn.preprocessing.normalize(housing.data)
m, n = housing.data.shape
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [13]:
X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name = "theta")

In [14]:
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")
gradients = 2/m*tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate*gradients)

In [15]:
init = tf.global_variables_initializer()

In [16]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 ==0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE =  7.7349157
Epoch 100 MSE =  1.3356388
Epoch 200 MSE =  1.3333036
Epoch 300 MSE =  1.3329794
Epoch 400 MSE =  1.3326784
Epoch 500 MSE =  1.3323984
Epoch 600 MSE =  1.3321372
Epoch 700 MSE =  1.331894
Epoch 800 MSE =  1.3316674
Epoch 900 MSE =  1.331456


### Linear regression with Gradient Descent by tensorflow gradient compute function

In [17]:
n_epochs = 1000
learning_rate = 0.01

In [18]:
scaled_housing_data = sklearn.preprocessing.normalize(housing.data)
m, n = housing.data.shape
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [19]:
X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name = "theta")

In [20]:
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate*gradients)

In [21]:
init = tf.global_variables_initializer()

In [22]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 ==0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE =  2.838192
Epoch 100 MSE =  1.3319263
Epoch 200 MSE =  1.3314254
Epoch 300 MSE =  1.3313923
Epoch 400 MSE =  1.3313594
Epoch 500 MSE =  1.3313264
Epoch 600 MSE =  1.3312936
Epoch 700 MSE =  1.3312608
Epoch 800 MSE =  1.3312279
Epoch 900 MSE =  1.3311952


### Linear regression with Gradient Descent by optimizer

In [23]:
n_epochs = 1000
learning_rate = 0.01

In [24]:
scaled_housing_data = sklearn.preprocessing.normalize(housing.data)
m, n = housing.data.shape
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [25]:
X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name = "theta")

In [26]:
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse)

In [27]:
init = tf.global_variables_initializer()

In [28]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 ==0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE =  8.967875
Epoch 100 MSE =  1.3439347
Epoch 200 MSE =  1.3408414
Epoch 300 MSE =  1.3401685
Epoch 400 MSE =  1.3395452
Epoch 500 MSE =  1.3389674
Epoch 600 MSE =  1.338432
Epoch 700 MSE =  1.3379349
Epoch 800 MSE =  1.3374738
Epoch 900 MSE =  1.3370456


### Linear regression with MIni-batch Gradient Descent

In [29]:
n_epochs = 10
learning_rate = 0.01

In [30]:
housing = fetch_california_housing()
m, n = housing.data.shape

In [31]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

In [32]:
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [33]:
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [34]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [35]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices] 
    y_batch = housing.target.reshape(-1, 1)[indices] 
    return X_batch, y_batch

In [36]:
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()

In [37]:
best_theta

array([[ 2.070016  ],
       [ 0.82045615],
       [ 0.11731729],
       [-0.22739057],
       [ 0.31134027],
       [ 0.00353193],
       [-0.01126994],
       [-0.91643935],
       [-0.8795008 ]], dtype=float32)