## Wide Neural Network

In [4]:
import tensorflow as tf

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

x = tf.placeholder(tf.float32, [None, 784]) # MNIST data image of shape 28 * 28 = 784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9 digits recognition => 10 classes

W1 = tf.Variable(tf.random_normal([784,256]))
W2 = tf.Variable(tf.random_normal([256,256]))
W3 = tf.Variable(tf.random_normal([256,10]))

b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([10]))

# Our hypothesis
L1 = tf.nn.relu(tf.add(tf.matmul(x, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
hypothesis = tf.add(tf.matmul(L2, W3), b3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, y))
#optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            sess.run(optimizer, feed_dict = {x: batch_xs, y: batch_ys})
            avg_cost += sess.run(cost, feed_dict={x: batch_xs,y: batch_ys})/total_batch
                           
        if epoch % display_step == 0 :
            print("Epoch:", "%04d" % (epoch+1) , "cost=", "{:.9f}".format(avg_cost))
            
    print("Optimization Finished")

    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy :",accuracy.eval({x: mnist.test.images, y:mnist.test.labels}))
        

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 160.840851151
Epoch: 0002 cost= 40.710142645
Epoch: 0003 cost= 25.247591665
Epoch: 0004 cost= 17.450017703
Epoch: 0005 cost= 12.445894088
Epoch: 0006 cost= 9.229762022
Epoch: 0007 cost= 6.805903080
Epoch: 0008 cost= 5.057814985
Epoch: 0009 cost= 3.710952040
Epoch: 0010 cost= 2.739558045
Epoch: 0011 cost= 1.947978113
Epoch: 0012 cost= 1.378158916
Epoch: 0013 cost= 1.046368325
Epoch: 0014 cost= 0.729827283
Epoch: 0015 cost= 0.522628606
Optimization Finished
Accuracy : 0.9434


## Wide Neural Network with Xavier initialization

In [5]:
def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)
    
import tensorflow as tf

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

x = tf.placeholder(tf.float32, [None, 784]) # MNIST data image of shape 28 * 28 = 784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9 digits recognition => 10 classes

W1 = tf.get_variable("W1",shape=[784,256], initializer=xavier_init(784,256))
W2 = tf.get_variable("W2",shape=[256,256], initializer=xavier_init(256,256))
W3 = tf.get_variable("W3",shape=[256,10], initializer=xavier_init(256,10))


b1 = tf.Variable(tf.random_normal([256]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([10]))

# Our hypothesis
L1 = tf.nn.relu(tf.add(tf.matmul(x, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
hypothesis = tf.add(tf.matmul(L2, W3), b3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, y))
#optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            sess.run(optimizer, feed_dict = {x: batch_xs, y: batch_ys})
            avg_cost += sess.run(cost, feed_dict={x: batch_xs,y: batch_ys})/total_batch
                           
        if epoch % display_step == 0 :
            print("Epoch:", "%04d" % (epoch+1) , "cost=", "{:.9f}".format(avg_cost))
            
    print("Optimization Finished")

    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy :",accuracy.eval({x: mnist.test.images, y:mnist.test.labels}))
        

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 0.339885130
Epoch: 0002 cost= 0.110329526
Epoch: 0003 cost= 0.066120946
Epoch: 0004 cost= 0.045542726
Epoch: 0005 cost= 0.031215971
Epoch: 0006 cost= 0.021620217
Epoch: 0007 cost= 0.016331682
Epoch: 0008 cost= 0.011938562
Epoch: 0009 cost= 0.009188881
Epoch: 0010 cost= 0.007762998
Epoch: 0011 cost= 0.005782462
Epoch: 0012 cost= 0.005591247
Epoch: 0013 cost= 0.003899493
Epoch: 0014 cost= 0.003532865
Epoch: 0015 cost= 0.003765834
Optimization Finished
Accuracy : 0.9774


## Deep & Wide NN with xavier initializer & dropout

깊은 네트워크에서는 오버피팅이 발생할 가능성이 높다. 그렇기 때문에 Dropout을 도입해보자.

In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    
    else:
        stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_normal_initializer(stddev=stddev)

# Import MINST data

learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

dropout_rate = tf.placeholder(tf.float32)

x = tf.placeholder(tf.float32, [None, 784]) # MNIST data image of shape 28 * 28 = 784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9 digits recognition => 10 classes

W1 = tf.get_variable("W1",shape=[784,512], initializer=xavier_init(784,512))
W2 = tf.get_variable("W2",shape=[512,256], initializer=xavier_init(512,256))
W3 = tf.get_variable("W3",shape=[256,128], initializer=xavier_init(256,128))
W4 = tf.get_variable("W4",shape=[128,64], initializer=xavier_init(128,64))
W5 = tf.get_variable("W5",shape=[64,10], initializer=xavier_init(64,10))

b1 = tf.Variable(tf.random_normal([512]))
b2 = tf.Variable(tf.random_normal([256]))
b3 = tf.Variable(tf.random_normal([128]))
b4 = tf.Variable(tf.random_normal([64]))
b5 = tf.Variable(tf.random_normal([10]))

# Our hypothesis
_L1 = tf.nn.relu(tf.add(tf.matmul(x, W1), b1))
L1 = tf.nn.dropout(_L1, dropout_rate)
_L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
L2 = tf.nn.dropout(_L2, dropout_rate)
_L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), b3))
L3 = tf.nn.dropout(_L3, dropout_rate)
_L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), b4))
L4 = tf.nn.dropout(_L4, dropout_rate)

hypothesis = tf.add(tf.matmul(L4, W5), b5)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, y))
#optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            sess.run(optimizer, feed_dict = {x: batch_xs, y: batch_ys,
                                             dropout_rate: 0.7})
            avg_cost += sess.run(cost, feed_dict={x: batch_xs,y: batch_ys,
                                                 dropout_rate: 0.7})/total_batch
                           
        if epoch % display_step == 0 :
            print("Epoch:", "%04d" % (epoch+1) , "cost=", "{:.9f}".format(avg_cost))
            
    print("Optimization Finished")

    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy :",accuracy.eval({x: mnist.test.images,
                                      y: mnist.test.labels,
                                      dropout_rate: 1}))
        

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 0.644190522
Epoch: 0002 cost= 0.222056301
Epoch: 0003 cost= 0.162179115
Epoch: 0004 cost= 0.129569015
Epoch: 0005 cost= 0.110163264
Epoch: 0006 cost= 0.093362146
Epoch: 0007 cost= 0.083366156
Epoch: 0008 cost= 0.073904912
Epoch: 0009 cost= 0.071167606
Epoch: 0010 cost= 0.065242512
Epoch: 0011 cost= 0.056154895
Epoch: 0012 cost= 0.057020281
Epoch: 0013 cost= 0.048523744
Epoch: 0014 cost= 0.050183194
Epoch: 0015 cost= 0.046713409
Optimization Finished
Accuracy : 0.9823


## Optimizer

Stochastic Gradient Descent  
momentum  
nag  
Adagrad  
Adadelta  
rmsprop  
Adam  

...

