# Linear regression with minibatch gradient descent and saving logs for Tensorboard

In [1]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

  from ._conv import register_converters as _register_converters


loading data:

In [2]:
housing=fetch_california_housing()

scaling data (important in case of gradient descent):

In [3]:
m, n=housing.data.shape
stdsc=StandardScaler()
scaled_housing=stdsc.fit_transform(housing.data)
scaled_housing_plus_bias=np.c_[np.ones([m,1]),scaled_housing]

scaled_housing_plus_bias.mean(axis=0)

array([ 1.00000000e+00,  6.60969987e-17,  5.50808322e-18,  6.60969987e-17,
       -1.06030602e-16, -1.10161664e-17,  3.44255201e-18, -1.07958431e-15,
       -8.52651283e-15])

batchsize and number of batches to be processed in one epoch:

In [4]:
batchsize=100
n_batches=np.ceil(m/batchsize).astype(int)
n_batches

207

Function for selection of a batch and parsing X, y:
- in contrast to experiment3_minibatch_gradient_descent.ipynb notebook we do not fix the random seed, in order to get different results in different runs.

In [5]:
def select_minibatch(epoch, batch_index, batch_size):
    #np.random.seed(epoch*n_batches+batch_index)
    #print(epoch*n_batches+batch_index)
    indices=np.random.randint(m, size=batch_size)
    X=scaled_housing_plus_bias[indices]
    y=housing.target[indices]
    #print(indices)
    return X,y

X,y=select_minibatch(1, batchsize, 5)
X.shape

(5, 9)

### Computational graph of one gradient descent step:
- in contrast to experiment3_minibatch_gradient_descent.ipynb notebook we do not set random seed=42 here, to get different results in different runs.

In [6]:
tf.reset_default_graph()

# placeholder nodes for X and y:
X=tf.placeholder(shape=(None,n+1), dtype=tf.float32, name='X') 
y=tf.placeholder(shape=(None,1), dtype=tf.float32, name='y')

# variable node: coefficients theta with random initialization:
#tf.set_random_seed(42)
theta=tf.Variable(tf.random_uniform((n + 1, 1), -1.0, 1.0),name='Theta') #we did not set random seed=42 here

# nodes for computing mse:
error=y-tf.matmul(X,theta)
mse=tf.reduce_mean(tf.square(error), name='mse')

lerr=0.01 #learning rate
# gradient descent optimizer object: 
optimizer=tf.train.GradientDescentOptimizer(learning_rate=lerr)
# operation for gradient descent of mse (probably doing gradient descent in all variables)
training_op=optimizer.minimize(mse, name="Training_op")
# saver object
saver=tf.train.Saver()

#variable initializer object: 
init=tf.global_variables_initializer()

### Adding nodes & objects to save logs for Tensorboard to graph:

In [7]:
from datetime import datetime
now=datetime.utcnow().strftime("%Y%m%d_%H%M%S")
root_logdir="linreg_logs"
logdir="{}/run-{}/".format(root_logdir, now)
# logdir2=root_logdir+'/run-'+str(now)+'/'

# node for evaluating mse and outputing it in tensorboard-readable string:
mse_summary=tf.summary.scalar('MSE',mse)
# object that can write summaries into logfile
file_writer=tf.summary.FileWriter(logdir, tf.get_default_graph())

Data types (ops for operation => operation node). optimizer is not a node.

In [8]:
type(X), type(mse), type(theta), type(training_op), type(optimizer), type(saver),\
type(mse_summary), type(file_writer) 

(tensorflow.python.framework.ops.Tensor,
 tensorflow.python.framework.ops.Tensor,
 tensorflow.python.ops.variables.Variable,
 tensorflow.python.framework.ops.Operation,
 tensorflow.python.training.gradient_descent.GradientDescentOptimizer,
 tensorflow.python.training.saver.Saver,
 tensorflow.python.framework.ops.Tensor,
 tensorflow.python.summary.writer.writer.FileWriter)

Ops currently in the graph. Note that constructing GradientDescentOptimizer and Saver instances added many nodes with names:
- gradients/*
- GradientDescent*
- save/*

also note that there is no node with name error: the computation of error is splitted into MatMul and sub nodes.

size of the computational graph (number of tensors, these are represented as nodes):

In [9]:
gf=tf.get_default_graph()
len(gf.get_operations())

70

Runing gradient descent step graph several times updating the variable theta. 
- In every 10th step, MSE is written into logfile:

In [10]:
with tf.Session() as sess:
    init.run()
    #print(theta.eval())
    n_epochs=10
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            Xb,yb=select_minibatch(epoch, batch_index, batchsize)
            #writing log for tensorboard
            if batch_index % 10 ==0:
                summary_str=mse_summary.eval(feed_dict={X:Xb, y:yb.reshape(-1,1)})
                step=epoch*n_batches+batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X:Xb, y:yb.reshape(-1,1)})
    best_theta=theta.eval()
    saver.save(sess, "./saved_models/final_minibatch.ckpt")
    # important to close in order to be able to directly see the results in the tensorboard:
    file_writer.close() 

optimal theta for the scaled data:

In [11]:
best_theta

array([[ 2.071829  ],
       [ 0.8068707 ],
       [ 0.12976527],
       [-0.21233962],
       [ 0.31577858],
       [ 0.00880455],
       [-0.04913743],
       [-0.8359768 ],
       [-0.7987448 ]], dtype=float32)

## Show Tensorboard graph directly in Jupyter notebook:
- using module from https://github.com/ageron/handson-ml/blob/master/tensorflow_graph_in_jupyter.py  originaly from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb

In [14]:
from tensorflow_graph_in_jupyter import show_graph
show_graph(gf)