## Train and Monitor

In [2]:
import tensorflow as tf

### Utilities

In [3]:
def run_tf(x):
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess: 
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        out = sess.run(x)

        coord.request_stop()
        coord.join(threads)
        
        return out

### Get the data

In [35]:
filename_q = tf.train.string_input_producer(["50_Startups.csv"])
reader = tf.TextLineReader(skip_header_lines=1)
key, value = reader.read(filename_q)

col1, col2, col3, col4, y_ = tf.decode_csv(value, record_defaults=[[1.], [1.], [1.], ["na"], [1.]])
x_ = tf.stack([col1, col2, col3])

min_after_dequeue = 50
batch_size = 5
capacity = min_after_dequeue + 3 * batch_size

X_, Y_ = tf.train.shuffle_batch( [x_, y_], batch_size=batch_size, capacity=capacity,
                                min_after_dequeue=min_after_dequeue)

Y_ = tf.reshape(Y_,[batch_size,1])

In [5]:
o = run_tf([X_,Y_])
o

[array([[ 86419.7 , 153514.11,      0.  ]], dtype=float32),
 array([[122776.86]], dtype=float32)]

### Perceptron 1-layer

In [6]:
W = tf.Variable(tf.constant(0.1,shape=[3,1],dtype=tf.float32),name="W")

In [7]:
H = tf.matmul(X_,W)
Y = tf.nn.relu(H)

** Mean Squared Error **

In [8]:
mse = tf.losses.mean_squared_error(Y_,Y)

** Train**

In [21]:
train_op = tf.train.GradientDescentOptimizer(1.)

**Getting the gradient values**

*train_op.compute_gradients* returns a list of **[(gradient,variable),..]** for all variables in the graph. If gradient is not computed for certain variable, gthe first arg will be None

In [22]:
dummy = tf.Variable(0.,name="dummy")
gradients = train_op.compute_gradients(mse)

In [11]:
gradients

[(<tf.Tensor 'gradients/MatMul_grad/tuple/control_dependency_1:0' shape=(3, 1) dtype=float32>,
  <tf.Variable 'W:0' shape=(3, 1) dtype=float32_ref>),
 (None, <tf.Variable 'dummy:0' shape=() dtype=float32_ref>)]

In [13]:
## Getting those variables for which grad will be computed
grad_to_compute = [[g,v] for g,v in gradients if g is not None] 
grad_to_compute

[[<tf.Tensor 'gradients/MatMul_grad/tuple/control_dependency_1:0' shape=(3, 1) dtype=float32>,
  <tf.Variable 'W:0' shape=(3, 1) dtype=float32_ref>]]

In [16]:
run_tf(grad_to_compute)

[[array([[-2.4960459e+10],
         [-2.7878351e+10],
         [-6.2043181e+10]], dtype=float32), array([[0.1],
         [0.1],
         [0.1]], dtype=float32)]]

**Apply gradient update**

In [23]:
apply_gradient_op = train_op.apply_gradients(gradients)

In [28]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess: 
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    for i in range(10000):
        _,loss = sess.run([apply_gradient_op,mse])
        if(i%1000 == 0):
            print(i, "MSE : ", loss)

    coord.request_stop()
    coord.join(threads)

0 MSE :  3685812200.0
1000 MSE :  8090847700.0
2000 MSE :  23168422000.0
3000 MSE :  15717729000.0
4000 MSE :  11783545000.0
5000 MSE :  12390589000.0
6000 MSE :  18038462000.0
7000 MSE :  24374240000.0
8000 MSE :  9353366000.0
9000 MSE :  16878437000.0


### Perceptron 2-layer

In [45]:
lr = tf.placeholder(tf.float32)
W1 = tf.Variable(tf.truncated_normal(shape=[3,3],dtype=tf.float32),name="W1")
B1 = tf.Variable(tf.truncated_normal(shape=[3],dtype=tf.float32),name="B1")

W2 = tf.Variable(tf.truncated_normal(shape=[3,1],dtype=tf.float32),name="W2")
B2 = tf.Variable(tf.truncated_normal(shape=[1],dtype=tf.float32),name="B2")

H1 = tf.nn.relu(tf.matmul(X_,W1)+B1)
Y = tf.nn.relu(tf.matmul(H1,W2)+B2)

mse = tf.losses.mean_squared_error(Y_,Y)

train_op = tf.train.GradientDescentOptimizer(lr)
gradients = train_op.compute_gradients(mse)
apply_gradient_op = train_op.apply_gradients(gradients)

In [51]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess: 
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    for i in range(20000):
        _,loss,w1 = sess.run([apply_gradient_op,mse,W1],feed_dict={lr:10.})
        if(i%1000 == 0):
            print(i, "MSE : ", "{:.2e}".format(loss))

    coord.request_stop()
    coord.join(threads)

0 MSE :  2.11e+10
1000 MSE :  8.64e+09
2000 MSE :  1.60e+10
3000 MSE :  7.14e+09
4000 MSE :  1.37e+10
5000 MSE :  1.15e+10
6000 MSE :  9.45e+09
7000 MSE :  1.14e+10
8000 MSE :  9.84e+09
9000 MSE :  1.25e+10
10000 MSE :  1.43e+10
11000 MSE :  1.24e+10
12000 MSE :  9.89e+09
13000 MSE :  1.42e+10
14000 MSE :  1.13e+10
15000 MSE :  9.00e+09
16000 MSE :  1.14e+10
17000 MSE :  1.21e+10
18000 MSE :  1.20e+10
19000 MSE :  1.20e+10


In [52]:
w1

array([[-8.2221272e-04,  3.1148474e+11, -6.2223542e-01],
       [-7.3724103e-01,  3.5655336e+11, -7.2242993e-01],
       [-1.4682888e+00,  9.4279690e+11, -1.2156020e+00]], dtype=float32)