# Tensorflow Experiments

In [1]:
import tensorflow as tf

In [2]:
x = tf.Variable(3, name = 'x')
y = tf.Variable(4, name = 'y')

f = x*x*y + y + 2

In [3]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

42


In [4]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

print(result)

42


In [5]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    result = f.eval()

print(result)

42


## Linear Regression

In [6]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [7]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [8]:
X = tf.constant(housing_data_plus_bias, dtype = tf.float32, name = 'X')
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = 'y')

XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session():
    theta_value = theta.eval()

print(theta_value)

[[-3.7185181e+01]
 [ 4.3633747e-01]
 [ 9.3952334e-03]
 [-1.0711310e-01]
 [ 6.4479220e-01]
 [-4.0338000e-06]
 [-3.7813708e-03]
 [-4.2348403e-01]
 [-4.3721911e-01]]


## Gradient Descent

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_housing_data_plus_bias = scaler.fit_transform(housing_data_plus_bias)

In [10]:
%%time

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = 'X')
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = 'y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name = 'theta')
y_pred = tf.matmul(X, theta, name = 'predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = 'mse')
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE =', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print(best_theta)

Epoch 0 MSE = 8.760842
Epoch 100 MSE = 4.8559346
Epoch 200 MSE = 4.8066597
Epoch 300 MSE = 4.805256
Epoch 400 MSE = 4.804861
Epoch 500 MSE = 4.804567
Epoch 600 MSE = 4.8043294
Epoch 700 MSE = 4.804134
Epoch 800 MSE = 4.8039775
Epoch 900 MSE = 4.8038497
[[ 0.4745822 ]
 [ 0.8019074 ]
 [ 0.11591133]
 [-0.20876332]
 [ 0.25681978]
 [-0.00517678]
 [-0.03849506]
 [-0.9466108 ]
 [-0.9138276 ]]
Wall time: 579 ms


### Using autodiff

In [11]:
%%time

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = 'X')
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = 'y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name = 'theta')
y_pred = tf.matmul(X, theta, name = 'predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = 'mse')
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE =', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print(best_theta)

Epoch 0 MSE = 6.8065143
Epoch 100 MSE = 4.9876857
Epoch 200 MSE = 4.93258
Epoch 300 MSE = 4.9038143
Epoch 400 MSE = 4.882239
Epoch 500 MSE = 4.865625
Epoch 600 MSE = 4.8527393
Epoch 700 MSE = 4.842697
Epoch 800 MSE = 4.8348284
Epoch 900 MSE = 4.8286314
[[ 0.9508381 ]
 [ 0.9347919 ]
 [ 0.16410865]
 [-0.41628242]
 [ 0.40927026]
 [ 0.01063237]
 [-0.04540569]
 [-0.4704894 ]
 [-0.45106593]]
Wall time: 513 ms


### Using optimizer

In [12]:
%%time

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = 'X')
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = 'y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name = 'theta')
y_pred = tf.matmul(X, theta, name = 'predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = 'mse')
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
# gradients = tf.gradients(mse, [theta])[0]
# training_op = tf.assign(theta, theta - learning_rate * gradients)
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE =', mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print(best_theta)

Epoch 0 MSE = 7.795078
Epoch 100 MSE = 4.93557
Epoch 200 MSE = 4.8905287
Epoch 300 MSE = 4.866946
Epoch 400 MSE = 4.8498726
Epoch 500 MSE = 4.8374476
Epoch 600 MSE = 4.8283877
Epoch 700 MSE = 4.821775
Epoch 800 MSE = 4.8169374
Epoch 900 MSE = 4.813396
[[ 0.7661979 ]
 [ 0.8308949 ]
 [ 0.14908847]
 [-0.2116431 ]
 [ 0.23638244]
 [ 0.00651661]
 [-0.04191977]
 [-0.67754215]
 [-0.6456227 ]]
Wall time: 509 ms


### Mini-batch algorithm

In [13]:
%%time

n_epochs = 1000
learning_rate = 0.01
batch_size = 1000
n_batches = int(np.ceil(m / batch_size))

target = housing.target.reshape(-1, 1)

def fetch_batch(epoch, batch_index, batch_size):
    begin = batch_size * batch_index
    end = begin + batch_size
    return scaled_housing_data_plus_bias[begin:end], target[begin:end]


# X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = 'X')
# y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = 'y')
X = tf.placeholder(dtype = tf.float32, shape=(None, n + 1), name = 'X')
y = tf.placeholder(dtype = tf.float32, shape=(None, 1), name = 'y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name = 'theta')
y_pred = tf.matmul(X, theta, name = 'predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = 'mse')
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch', epoch, 'MSE =', mse.eval(feed_dict = {X: scaled_housing_data_plus_bias, y: target}))
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
    
    best_theta = theta.eval()

print(best_theta)

Epoch 0 MSE = 10.142023
Epoch 100 MSE = 4.820488
Epoch 200 MSE = 4.8205338
Epoch 300 MSE = 4.820533
Epoch 400 MSE = 4.8205338
Epoch 500 MSE = 4.820532
Epoch 600 MSE = 4.820532
Epoch 700 MSE = 4.820532
Epoch 800 MSE = 4.820532
Epoch 900 MSE = 4.820532
[[-0.05229354]
 [ 0.8675569 ]
 [ 0.10342735]
 [-0.28371745]
 [ 0.3321549 ]
 [ 0.0084083 ]
 [-0.03587088]
 [-0.8639901 ]
 [-0.9650646 ]]
Wall time: 8.01 s
