## Train and Monitor

In [1]:
import tensorflow as tf

  return f(*args, **kwds)


### Utilities

In [2]:
def run_tf(x):
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess: 
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        out = sess.run(x)

        coord.request_stop()
        coord.join(threads)
        
        return out

### Get the data

In [3]:
filename_q = tf.train.string_input_producer(["50_Startups.csv"])
reader = tf.TextLineReader(skip_header_lines=1)
key, value = reader.read(filename_q)

col1, col2, col3, col4, y_ = tf.decode_csv(value, record_defaults=[[1.], [1.], [1.], ["na"], [1.]])
x_ = tf.stack([col1, col2, col3])

min_after_dequeue = 50
batch_size = 5
capacity = min_after_dequeue + 3 * batch_size

X_, Y_ = tf.train.shuffle_batch( [x_, y_], batch_size=batch_size, capacity=capacity,
                                min_after_dequeue=min_after_dequeue)

Y_ = tf.reshape(Y_,[batch_size,1])

In [4]:
o = run_tf([X_,Y_])
o

[array([[ 61136.38, 152701.92,  88218.23],
        [162597.7 , 151377.6 , 443898.53],
        [ 66051.52, 182645.56, 118148.2 ],
        [100671.96,  91790.61, 249744.55],
        [     0.  , 116983.8 ,  45173.06]], dtype=float32), array([[ 97483.56],
        [191792.06],
        [103282.38],
        [144259.4 ],
        [ 14681.4 ]], dtype=float32)]

### Perceptron 1-layer

In [6]:
W = tf.Variable(tf.constant(0.1,shape=[3,1],dtype=tf.float32),name="W")
tf.summary.histogram("P1/W",W)

<tf.Tensor 'P1/W:0' shape=() dtype=string>

In [7]:
H = tf.matmul(X_,W)
Y = tf.nn.relu(H)
tf.summary.histogram("P1/Y",Y)

<tf.Tensor 'P1/Y:0' shape=() dtype=string>

** Mean Squared Error **

In [8]:
mse = tf.losses.mean_squared_error(Y_,Y)
tf.summary.scalar("P1/MSE",mse)

<tf.Tensor 'MSE:0' shape=() dtype=string>

** Train**

In [9]:
train_op = tf.train.AdamOptimizer(0.001)

**Getting the gradient values**

*train_op.compute_gradients* returns a list of **[(gradient,variable),..]** for all variables in the graph. If gradient is not computed for certain variable, gthe first arg will be None

In [10]:
dummy = tf.Variable(0.,name="dummy")
gradients = train_op.compute_gradients(mse)


In [11]:
gradients

[(<tf.Tensor 'gradients/MatMul_grad/tuple/control_dependency_1:0' shape=(3, 1) dtype=float32>,
  <tf.Variable 'W:0' shape=(3, 1) dtype=float32_ref>),
 (None, <tf.Variable 'dummy:0' shape=() dtype=float32_ref>)]

In [12]:
## Getting those variables for which grad will be computed
grad_to_compute = [[g,v] for g,v in gradients if g is not None]
tf.summary.histogram("P1/gradient",grad_to_compute)
grad_to_compute

[[<tf.Tensor 'gradients/MatMul_grad/tuple/control_dependency_1:0' shape=(3, 1) dtype=float32>,
  <tf.Variable 'W:0' shape=(3, 1) dtype=float32_ref>]]

In [28]:
run_tf(grad_to_compute)

[[array([[-1.3153084e+10],
         [-1.8475014e+10],
         [-3.9256867e+10]], dtype=float32), array([[0.1],
         [0.1],
         [0.1]], dtype=float32)]]

**Apply gradient update**

In [13]:
apply_gradient_op = train_op.apply_gradients(gradients,global_step=tf.train.get_or_create_global_step())

In [14]:
merged = tf.summary.merge_all()

In [15]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess: 
    
    writer = tf.summary.FileWriter("log/Train&Monitor/P1",sess.graph)
    
    sess.run(tf.global_variables_initializer())

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    for i in range(10000):
        _,loss,summary,logit = sess.run([apply_gradient_op,mse,merged,Y])
        writer.add_summary(summary,i)
        if(i%1000 == 0):
            print(i, "MSE : ", loss,logit)

    coord.request_stop()
    coord.join(threads)

0 MSE :  7694408000.0 [[53699.57 ]
 [59966.75 ]
 [20531.861]
 [27707.44 ]
 [75787.39 ]]
1000 MSE :  314319360.0 [[147937.2 ]
 [ 93686.09]
 [ 77239.71]
 [140559.19]
 [131304.84]]
2000 MSE :  231760420.0 [[ 18663.025]
 [131043.04 ]
 [200111.27 ]
 [119611.56 ]
 [ 82707.68 ]]
3000 MSE :  59319230.0 [[160114.84 ]
 [201665.06 ]
 [ 92989.11 ]
 [120800.016]
 [ 46185.047]]
4000 MSE :  205125230.0 [[ 91094.61]
 [127942.91]
 [118149.81]
 [132266.47]
 [111038.42]]
5000 MSE :  331307360.0 [[167889.36 ]
 [127693.45 ]
 [ 41613.215]
 [ 41613.215]
 [131574.2  ]]
6000 MSE :  189930860.0 [[138813.36 ]
 [155408.22 ]
 [ 91327.625]
 [158568.38 ]
 [118383.12 ]]
7000 MSE :  69596410.0 [[102008.98 ]
 [103418.84 ]
 [ 56313.844]
 [113269.38 ]
 [103418.84 ]]
8000 MSE :  211515120.0 [[ 41573.254]
 [ 91733.77 ]
 [106177.21 ]
 [202769.77 ]
 [ 77969.336]]
9000 MSE :  316496060.0 [[112570.42 ]
 [158910.42 ]
 [158910.42 ]
 [203483.28 ]
 [ 69054.625]]


### Perceptron 2-layer

In [4]:
lr = tf.placeholder(tf.float32)
W1 = tf.Variable(tf.truncated_normal(shape=[3,3],dtype=tf.float32),name="W1")
tf.summary.histogram("P2/W1",W1)
B1 = tf.Variable(tf.truncated_normal(shape=[3],dtype=tf.float32),name="B1")
tf.summary.histogram("P2/B1",B1)

W2 = tf.Variable(tf.truncated_normal(shape=[3,1],dtype=tf.float32),name="W2")
tf.summary.histogram("P2/W2",W2)
B2 = tf.Variable(tf.truncated_normal(shape=[1],dtype=tf.float32),name="B2")
tf.summary.histogram("P2/B2",B2)

H1 = tf.nn.relu(tf.matmul(X_,W1)+B1)
tf.summary.histogram("P2/H1",H1)
Y = tf.nn.relu(tf.matmul(H1,W2)+B2)
tf.summary.histogram("P2/Y",Y)
#Y = tf.matmul(H1,W2)+B2


mse = tf.losses.mean_squared_error(Y_,Y)
tf.summary.scalar("P2/MSE",mse)

#train_op = tf.train.AdamOptimizer(lr).minimize(mse)
train_op = tf.train.AdamOptimizer(lr)
gradients = train_op.compute_gradients(mse)

grad_to_compute = [[g,v] for g,v in gradients if g is not None]
for grad in grad_to_compute:
    tf.summary.histogram("P2/gradient",grad)

apply_gradient_op = train_op.apply_gradients(gradients,global_step=tf.train.get_or_create_global_step())

In [5]:
merged = tf.summary.merge_all()

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess: 
    
    writer = tf.summary.FileWriter("log/Train&Monitor/P2",sess.graph)
    
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    for i in range(20000):
        _,loss,w1,logit,summary = sess.run([apply_gradient_op,mse,W1,Y,merged],feed_dict={lr:1e-3})
        writer.add_summary(summary,i)
        if(i%1000 == 0):
            print(i, "MSE : ", "{:.2e}".format(loss),logit)

    coord.request_stop()
    coord.join(threads)

0 MSE :  1.08e+10 [[1.455065e-01]
 [1.455065e-01]
 [1.455065e-01]
 [1.455065e-01]
 [1.115814e+05]]
1000 MSE :  6.82e+08 [[ 16611.37]
 [110381.28]
 [ 70880.31]
 [132199.56]
 [122375.88]]
2000 MSE :  1.82e+08 [[200857.17 ]
 [ 71996.28 ]
 [201574.53 ]
 [ 44010.746]
 [ 41142.59 ]]
3000 MSE :  1.85e+08 [[131522.53 ]
 [139056.9  ]
 [ 41502.42 ]
 [112515.516]
 [203413.36 ]]
4000 MSE :  2.11e+08 [[116195.96 ]
 [ 17457.367]
 [176261.86 ]
 [ 77515.19 ]
 [167555.16 ]]
5000 MSE :  2.89e+08 [[112598.164]
 [102677.195]
 [ 64807.848]
 [177140.12 ]
 [ 41760.94 ]]
6000 MSE :  1.59e+08 [[ 41261.496]
 [160887.44 ]
 [ 67876.9  ]
 [102319.93 ]
 [102319.93 ]]
7000 MSE :  2.44e+08 [[ 91983.6 ]
 [176793.61]
 [ 69335.38]
 [ 51329.85]
 [161025.88]]
8000 MSE :  2.18e+08 [[176745.67 ]
 [117906.234]
 [ 63122.574]
 [ 78985.484]
 [ 77843.52 ]]
9000 MSE :  1.39e+08 [[129467.734]
 [201936.58 ]
 [ 91644.73 ]
 [131147.72 ]
 [106025.43 ]]
10000 MSE :  1.24e+08 [[ 78679.16 ]
 [122586.51 ]
 [101208.57 ]
 [ 69212.086]
 [101

In [6]:
w1

array([[ 0.22525682,  0.79626054, -1.0879992 ],
       [ 1.1515378 ,  1.3971967 , -0.3985752 ],
       [-0.34762326, -0.27450502, -0.80606395]], dtype=float32)