# Linear regression with minibatch gradient descent 

In [1]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

  from ._conv import register_converters as _register_converters


loading data:

In [2]:
housing=fetch_california_housing()

scaling data (important in case of gradient descent):

In [3]:
m, n=housing.data.shape
stdsc=StandardScaler()
scaled_housing=stdsc.fit_transform(housing.data)
scaled_housing_plus_bias=np.c_[np.ones([m,1]),scaled_housing]

scaled_housing_plus_bias.mean(axis=0)

array([ 1.00000000e+00,  6.60969987e-17,  5.50808322e-18,  6.60969987e-17,
       -1.06030602e-16, -1.10161664e-17,  3.44255201e-18, -1.07958431e-15,
       -8.52651283e-15])

batchsize and number of batches to be processed in one epoch:

In [4]:
batchsize=100
n_batches=np.ceil(m/batchsize).astype(int)
n_batches

207

function for selection of a batch and parsing X, y:

In [25]:
def select_minibatch(epoch, batch_index, batch_size):
    np.random.seed(epoch*n_batches+batch_index)
    #print(epoch*n_batches+batch_index)
    indices=np.random.randint(m, size=batch_size)
    X=scaled_housing_plus_bias[indices]
    y=housing.target[indices]
    #print(indices)
    return X,y

X,y=select_minibatch(1, batchsize, 5)
X.shape

(5, 9)

computational graph of one gradient descent step:

In [26]:
# placeholder nodes for X and y:
X=tf.placeholder(shape=(None,n+1), dtype=tf.float32) 
y=tf.placeholder(shape=(None,1), dtype=tf.float32)

# variable node: coefficients theta with random initialization:
#tf.set_random_seed(42)
theta=tf.Variable(tf.random_uniform((n + 1, 1), -1.0, 1.0, seed=42),name='Theta')

# nodes for computing mse:
error=y-tf.matmul(X,theta)
mse=tf.reduce_mean(tf.square(error), name='mse')

lerr=0.01 #learning rate
# gradient descent optimizer object: 
optimizer=tf.train.GradientDescentOptimizer(learning_rate=lerr)
# node for gradient descent of mse (probably doing gradient descent in all variables)
training_op=optimizer.minimize(mse)


Data types (ops for operation => operation node). optimizer is not a node.

In [27]:
type(X), type(mse), type(theta), type(training_op), type(optimizer) 

(tensorflow.python.framework.ops.Tensor,
 tensorflow.python.framework.ops.Tensor,
 tensorflow.python.ops.variables.Variable,
 tensorflow.python.framework.ops.Operation,
 tensorflow.python.training.gradient_descent.GradientDescentOptimizer)

Runing gradient descent step graph several times updating the variable theta:

In [31]:
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    #print(theta.eval())
    n_epochs=10
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            Xb,yb=select_minibatch(epoch, batch_index, batchsize)
            sess.run(training_op, feed_dict={X:Xb, y:yb.reshape(-1,1)})
    best_theta=theta.eval(session=sess)

optimal theta for the scaled data:

In [32]:
best_theta

array([[ 2.070016  ],
       [ 0.8204561 ],
       [ 0.1173173 ],
       [-0.22739051],
       [ 0.3113402 ],
       [ 0.00353193],
       [-0.01126994],
       [-0.91643935],
       [-0.8795008 ]], dtype=float32)