# Activation function 비교 실험

hidden layer의 activation function이 hypothesis의 형태를 어떻게 변화시키는지 확인하기 위한 예제.
어떤 과목 수강생의 학습 시간과 pass/fail여부 데이터를 학습하여 학습시간에 따른 pass/fail 예측 모형을 만든다.

activation function로 ReLU, sigmoid, tanh를 사용하여 그 예측 성능을 비교하고
만들어진 모델의 hypothesis를 2차원 시각화 하여 activation fuction에 따라 decision surface가 어떻게 형성되는지를 살펴보고자 한다.

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
data = np.array([[2., 0.], [3., 0.], [4., 0.], [5., 1.], [7., 0.], [8., 1.], [9., 1.]], dtype=np.float32)
x = data[:,0]
y = data[:,1]

In [3]:
def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev = stddev)

In [4]:
l1_in = 1
l1_out = 4
l2_out = 4
l3_out = 1
init_min = -1.0
init_max = 1.0

with tf.name_scope('model'):
    X = tf.placeholder(dtype=tf.float32, name = 'input')
    Y = tf.placeholder(dtype=tf.float32, name = 'target')
    with tf.name_scope('hidden1'):
        i1 = tf.reshape(X, [1,-1])
        W1 = tf.get_variable("w_l1", shape=[l1_out, l1_in], initializer = xavier_init(l1_in, l1_out))
        b1 = tf.get_variable("b_l1", shape=[l1_out, ], initializer = xavier_init(l1_out, l1_out))
        #W1 = tf.Variable(tf.random_uniform([l1_fs, 1], init_min, init_max), dtype=tf.float32, name = 'w_l1')
        #b1 = tf.Variable(tf.random_uniform([l1_fs,], init_min, init_max), dtype=tf.float32, name = 'b_l1')
        b1 = tf.reshape(b1, [l1_out, -1])
        #l1 = tf.nn.relu(tf.matmul(W1, i1) + b1, name = 'o_l1')
        #l1 = tf.nn.sigmoid(tf.matmul(W1, i1) + b1, name = 'o_l1')
        l1 = tf.nn.tanh(tf.matmul(W1, i1) + b1, name = 'o_l1')
    with tf.name_scope('hidden2'):
        W2 = tf.get_variable("w_l2", shape=[l2_out, l1_out], initializer = xavier_init(l1_out, l2_out))
        b2 = tf.get_variable("b_l2", shape=[l2_out, ], initializer = xavier_init(l2_out, l2_out))
        #W2 = tf.Variable(tf.random_uniform([1, l1_fs], init_min, init_max), dtype=tf.float32, name = 'w_l2')
        #b2 = tf.Variable(tf.random_uniform([1], init_min, init_max), dtype=tf.float32, name = 'b_l2')
        b2 = tf.reshape(b2, [l2_out, -1])
        #l2 = tf.nn.relu(tf.matmul(W2, l1) + b2, name = 'o_l2')
        #l2 = tf.nn.sigmoid(tf.matmul(W2, l1) + b2, name = 'o_l2')
        l2 = tf.nn.tanh(tf.matmul(W2, l1) + b2, name = 'o_l2')
    with tf.name_scope('hidden3'):
        W3 = tf.get_variable("w_l3", shape=[l3_out, l2_out], initializer = xavier_init(l2_out, l3_out))
        b3 = tf.get_variable("b_l3", shape=[l3_out, ], initializer = xavier_init(l3_out, l3_out))
        l3 = tf.matmul(W3, l2) + b3
    with tf.name_scope('output'):
        output = tf.nn.sigmoid(l3)
        predict = tf.round(output)

In [5]:
with tf.name_scope('eval'):
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(l3, Y, name = 'loss'))
    #loss = -tf.reduce_mean(tf.reduce_sum(Y * tf.log(y_) + (1-Y) * tf.log(1-y_)))
    opt = tf.train.GradientDescentOptimizer(learning_rate=0.1)
    train = opt.minimize(loss)
    tf.summary.scalar('loss', loss)

In [6]:
%rm -rf log_dir

In [7]:
writer = tf.summary.FileWriter("log_dir")
summary_op = tf.summary.merge_all()

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
writer.add_graph(sess.graph)

In [8]:
for step in range(50000):
    summary_val, loss_val, _ = sess.run([summary_op, loss, train], feed_dict={X:x, Y:y})
    '''
    w1_val, b1_val, l1_val, w2_val, b2_val, l2_val, summary_val, loss_val, _ = sess.run([W1, b1, l1, W2, b2, l2, summary_op, loss, train], feed_dict={X:x, Y:y})
    print(w1_val)
    print(b1_val)
    print(l1_val)
    print(w2_val)
    print(b2_val)
    print('l2')
    print(l2_val)
    print('true_value')
    print(y)
    print(loss_val)
    print('---')
    '''
    
    writer.add_summary(summary_val, step)
    if (step % 1000 == 0):
        print('step:', step, 'loss:', loss_val)
w1_val, b1_val, w2_val, b2_val = sess.run([W1, b1, W2, b2])
print('w1', w1_val, 'b1', b1_val, 'w2', w2_val, 'b2', b2_val)

step: 0 loss: 0.703121
step: 1000 loss: 0.355473
step: 2000 loss: 0.329898
step: 3000 loss: 0.324326
step: 4000 loss: 0.309684
step: 5000 loss: 0.287422
step: 6000 loss: 0.280281
step: 7000 loss: 0.277539
step: 8000 loss: 0.276221
step: 9000 loss: 0.275466
step: 10000 loss: 0.274981
step: 11000 loss: 0.274643
step: 12000 loss: 0.274396
step: 13000 loss: 0.274206
step: 14000 loss: 0.274056
step: 15000 loss: 0.273935
step: 16000 loss: 0.273835
step: 17000 loss: 0.273751
step: 18000 loss: 0.27368
step: 19000 loss: 0.273618
step: 20000 loss: 0.273564
step: 21000 loss: 0.273517
step: 22000 loss: 0.273475
step: 23000 loss: 0.273438
step: 24000 loss: 0.273405
step: 25000 loss: 0.273375
step: 26000 loss: 0.273347
step: 27000 loss: 0.273322
step: 28000 loss: 0.2733
step: 29000 loss: 0.273279
step: 30000 loss: 0.27326
step: 31000 loss: 0.273242
step: 32000 loss: 0.273225
step: 33000 loss: 0.27321
step: 34000 loss: 0.273196
step: 35000 loss: 0.273182
step: 36000 loss: 0.27317
step: 37000 loss: 0.

In [9]:
print(sess.run(output, feed_dict={X:[2.0, 3.0, 5.0, 7.0, 8.0]}))
print(sess.run(predict, feed_dict={X:[2.0, 3.0, 5.0, 7.0, 8.0]}))

[[  1.44811143e-04   1.47187777e-04   9.99168634e-01   6.66969240e-01
    6.66842520e-01]]
[[ 0.  0.  1.  1.  1.]]


In [None]:
sess.close()

In [None]:
#!tensorboard --log=log_dir

In [None]:
tf.reset_default_graph()