In [1]:
import tensorflow as tf

In [2]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")

f = x*x*y + y + 2
# only creates a computation graph
# evaluating a tf graph can be done by opening a tensorflow
# session and using it to initialise the variable and evaluate f


In [4]:
# creating a sssion , initialising the variables
# ans close section

print(f)

sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

Tensor("add_1:0", shape=(), dtype=int32)
42


In [6]:
# another way to evaluate is
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

# inside with block the session is set as default session.
# calling x.initializer.run() 


In [8]:
# isntead of manually running the initialiser everytime
# we can use tf.global_variables_initialiser


init = tf.global_variables_initializer() # prepare init node

with tf.Session() as sess:
    init.run() # actual initialisation
    result = f.eval()

print(result)

42


In [9]:
# in jupyter notebook we can also use interactive session
# as the default session

sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


In [10]:
# any node created is automatically added to default graph

x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()


True

In [11]:
# we can also reate multiple independent graph

graph= tf.Graph()

with graph.as_default():
    x2 = tf.Variable(2)

x2.graph is graph

True

In [12]:
x2.graph is tf.get_default_graph()

False

In [15]:
# to reset thedefaultgraph
tf.reset_default_graph()

### Life cycle of a Node Value

In [17]:
# tensorflow automaticlly determines the set of nodes 
# that it dpends on and it evaluates these nodes first

w = tf.constant(3)
x =  w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval()) # w and x are automatically computed
    print(z.eval())

10
15


In [18]:
# for efficent run if we want to evaluate y and z 
# without evaluating x and w twice
#we ask tf to evaluate bith y and z in just one graph

with tf.Session() as sess:
    y_val, z_val = sess.run([y,z])
    print(y_val)
    print(z_val)

# in single process tensorflow multiple sessions do not share 
# any state, even ifthe reuse the saegraph

10
15


### Linear Regression with Tensorflow

In [None]:
# tensorflow operation can take any number of inputs and produce
# variables and constants need no input they are called source ops
# the input anad outputs are multidimensional arrays called tensors
# Python api tenosrs are just numpy ndarrays
# linear regression on californiadataset

In [19]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m,n = housing.data.shape

# we add an extra bias input feature (X0 =1) to all training instances
# this is done by numpy no tensorflow involved
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]

In [22]:
# now creating two constant nodes X and y and theta
X = tf.constant(housing_data_plus_bias,dtype=tf.float32, name="X")

# we need to reshape y to calculate theta
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
XT = tf.transpose(X)

# thsi is the normal equation for linear regression
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)),XT), y)

# session to evaluate theta
with tf.Session() as sess:
    theta_value = theta.eval()

# the main benefit is that tensoforflow would computethis directly on your primary


### gradient descent computation

In [23]:
# implementing gradient descent

# we can also use batch gradientdescent thatnt he normal equation
# it is importnat to first normalize te input feature vectors
# or training would be much slower
# we can do it by numpy, tensorfow , scikit learn StandardSCaler

from sklearn.preprocessing import StandardScaler

scaled_housing_data_plus_bias = StandardScaler().fit_transform(housing_data_plus_bias)

In [25]:
print(scaled_housing_data_plus_bias[:5][:5])

[[ 0.          2.34476576  0.98214266  0.62855945 -0.15375759 -0.9744286
  -0.04959654  1.05254828 -1.32783522]
 [ 0.          2.33223796 -0.60701891  0.32704136 -0.26333577  0.86143887
  -0.09251223  1.04318455 -1.32284391]
 [ 0.          1.7826994   1.85618152  1.15562047 -0.04901636 -0.82077735
  -0.02584253  1.03850269 -1.33282653]
 [ 0.          0.93296751  1.85618152  0.15696608 -0.04983292 -0.76602806
  -0.0503293   1.03850269 -1.33781784]
 [ 0.         -0.012881    1.85618152  0.3447108  -0.03290586 -0.75984669
  -0.08561576  1.03850269 -1.33781784]]


In [27]:
# steps for gradient descent
# 1. the random_uniform() function creates a node in the graph
# to generate a tensorcontaining random values, given its shape and value range
# like Numpy's rand() function

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1,1], -1.0,1.0), name="theta")


In [28]:
# 2. the assign function creates a node 
# this will assign a new value to tf.variable

# predictions X . theta
y_pred = tf.matmul(X,theta, name="predictions")

error = y_pred -y
mse = tf.reduce_mean(tf.square(error),name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)

# next op calculated
training_op = tf.assign(theta,theta - learning_rate * gradients)



In [29]:
# The main loop executes the training sterp over and over and prints the
# current mean squared error (mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if (epoch % 100 == 0):
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()

Epoch 0 MSE = 9.467428
Epoch 100 MSE = 5.096341
Epoch 200 MSE = 4.9982862
Epoch 300 MSE = 4.9473033
Epoch 400 MSE = 4.9104877
Epoch 500 MSE = 4.8834043
Epoch 600 MSE = 4.8634033
Epoch 700 MSE = 4.8485866
Epoch 800 MSE = 4.837575
Epoch 900 MSE = 4.8293614


In [30]:
print(best_theta) # from gradients

[[-0.2949021 ]
 [ 0.86975557]
 [ 0.16850199]
 [-0.26238075]
 [ 0.2683621 ]
 [ 0.01307414]
 [-0.04439048]
 [-0.49952713]
 [-0.47103322]]


In [32]:
print(theta_value) # from normal equation

[[-3.68901253e+01]
 [ 4.36643779e-01]
 [ 9.45042260e-03]
 [-1.07117996e-01]
 [ 6.43712580e-01]
 [-3.96291580e-06]
 [-3.78801115e-03]
 [-4.20931637e-01]
 [-4.34006572e-01]]


### using autodiff

In [33]:
# mathematically deriving values of cost function
# is a big headache
# you could use symbolic differentiation to automatically find the equations
# for the partial derivatives for you
# but its not efficient

# for examplein this function
def my_func(a,b):
    z=0
    for i in range(100):
        z = a * np.cos(z + i) + z * np.sin(b - i)
    return z


In [34]:
# tensorflow autodiff can compute thte gradients 
gradients = tf.gradients(mse,[theta])[0]

# gradients function takes an op (in this case mse) and
# a list of varibles (in this case just theta)
# and creates a list of ops (one per variable)
# and computes gradients of the op with regards to
# each variable so gradients node will compute the
# gradient vector of the mse with regards to theta


In [36]:
# running the tf session once more with changing gradients

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch,"MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
print(best_theta)

# tensorflow uses reverse-mode autodiff 
# which is perfect when there are many inputs and fewer outputs


Epoch 0 MSE = 9.54605
Epoch 100 MSE = 4.924492
Epoch 200 MSE = 4.822067
Epoch 300 MSE = 4.8133245
Epoch 400 MSE = 4.810294
Epoch 500 MSE = 4.8083177
Epoch 600 MSE = 4.8069057
Epoch 700 MSE = 4.8058887
Epoch 800 MSE = 4.805155
Epoch 900 MSE = 4.8046255
[[ 0.06252575]
 [ 0.80939484]
 [ 0.12736683]
 [-0.20410803]
 [ 0.24470128]
 [-0.00110729]
 [-0.0396236 ]
 [-0.85606456]
 [-0.8232432 ]]


### Using an Optimizer


In [37]:
# tensoflow not only can compute gradients
# but it also provides a number of optimizers out of box
# gradient descent optimizer

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)


# if we want momentum optimizer
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

### Ways to feed data into training algorithm

In [40]:
# modifying the previous code to implement Mini-batch Gradient Descent
# we need a way to replace X and y at every iteration
# and next mini batch
# we can use placeholder nodes
# they don't perform any computation
# they just outut the data you tellthem to output 
# at runtime
# to create a placeholder node we need to call placeholder() function
# and specify the output tensors data type
# if we specify None for dimension it means any shape

# creating a placeholder nde A 
# and a node B = A + 5
# when evaluating B we pass a feed_dict to eval()
# that specifies the value of A
# A must be of rank 2
# and there must be three columns ni it can have naynumber of rows

A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A : [[1,2,3]]})
    B_val_2 = B.eval(feed_dict = {A: [[4,5,6],[7,8,9]]})

    print(B_val_1)
    print(B_val_2)

[[6. 7. 8.]]
[[ 9. 10. 11.]
 [12. 13. 14.]]


In [41]:
# implementing mini bathc gradient descent
# changin the definition of X and y in construction phase

X = tf.placeholder(tf.float32, shape=(None,n+1), name="X")
y = tf.placeholder(tf.float32, shape=(None,1), name="y")

# defining batch size and computing the total number of batches

batch_size = 100
n_batches = int(np.ceil(m/batch_size))




In [45]:
# execution [hase fetching the mini batches one by one then providing the value of X and y 
# through feed dict

shuffled_indices = np.random.permutation(m)
housing_X_shuffled = housing_data_plus_bias[shuffled_indices]
housing_y_shuffled = housing.target[shuffled_indices]

def fetch_batch(epoch, batch_index, batch_size):
    # getting the data from disk
    X_batch = housing_X_shuffled[epoch:epoch + batch_size]
    y_batch = housing_y_shuffled[epoch:epoch + batch_size].reshape(-1,1)
    
    return X_batch,y_batch

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X:X_batch, y: y_batch})
            
    best_theta - theta.eval()

print(best_theta)

[[ 0.06252575]
 [ 0.80939484]
 [ 0.12736683]
 [-0.20410803]
 [ 0.24470128]
 [-0.00110729]
 [-0.0396236 ]
 [-0.85606456]
 [-0.8232432 ]]


In [None]:
# evaluate the code given in notebook