# Softmax classifier

In [10]:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

x = tf.placeholder("float", [None, 784])
y = tf.placeholder("float", [None, 10])

W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

activation = tf.nn.softmax(tf.matmul(x, W) + b)

cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(activation), reduction_indices=1))
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost)                     

init = tf.initialize_all_variables()

with tf.Session() as sess :
    sess.run(init)
    batch_size = 100
    training_epoch = 10
    
    for epoch in range(training_epoch):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={x:batch_xs, y:batch_ys})
            avg_cost += sess.run(cost, feed_dict={x:batch_xs, y:batch_ys}) / total_batch
            
        print "Epoch:", "%04d" % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)
        
    print "Optimiization Finished"
    
    correct_prediction = tf.equal(tf.argmax(activation, 1), tf.argmax(y, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print "Accuracy", accuracy.eval({x: mnist.test.images, y:mnist.test.labels})

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
 Epoch: 0001 cost= 1.174406662
Epoch: 0002 cost= 0.662055905
Epoch: 0003 cost= 0.550479108
Epoch: 0004 cost= 0.496710520
Epoch: 0005 cost= 0.463672952
Epoch: 0006 cost= 0.440914370
Epoch: 0007 cost= 0.423860170
Epoch: 0008 cost= 0.410645333
Epoch: 0009 cost= 0.399856004
Epoch: 0010 cost= 0.390955159
Optimiization Finished
Accuracy 0.9032


# Neural Nets(NN) using ReLU

In [16]:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

#
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

X = tf.placeholder("float", [None, 784])
Y = tf.placeholder("float", [None, 10])

W1 = tf.Variable(tf.random_normal([784, 256]))
W2 = tf.Variable(tf.random_normal([256, 256]))
W3 = tf.Variable(tf.random_normal([256, 10]))

B1 = tf.Variable(tf.random_normal([256]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([10]))

L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), B1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), B2))
hypothesis = tf.add(tf.matmul(L2, W3), B3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

init = tf.initialize_all_variables()

with tf.Session() as sess :
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
            avg_cost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys}) / total_batch
            
        print "Epoch:", "%04d" % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)
        
    print "Optimiization Finished"
    
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print "Accuracy", accuracy.eval({X: mnist.test.images, Y:mnist.test.labels})

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
 Epoch: 0001 cost= 182.111037977
Epoch: 0002 cost= 40.857856352
Epoch: 0003 cost= 25.247431128
Epoch: 0004 cost= 17.352086008
Epoch: 0005 cost= 12.565217784
Epoch: 0006 cost= 9.254900367
Epoch: 0007 cost= 6.848520670
Epoch: 0008 cost= 4.977609372
Epoch: 0009 cost= 3.570799279
Epoch: 0010 cost= 2.527768218
Epoch: 0011 cost= 1.800466504
Epoch: 0012 cost= 1.295850018
Epoch: 0013 cost= 0.916595034
Epoch: 0014 cost= 0.588558199
Epoch: 0015 cost= 0.442225920
Optimiization Finished
Accuracy 0.9467


# Using Xavier initialization

In [19]:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data

def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        sddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_noraml_initializer(stddev=stddev)

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

#
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

X = tf.placeholder("float", [None, 784])
Y = tf.placeholder("float", [None, 10])

W1 = tf.get_variable("W1", shape=[784, 256], initializer=xavier_init(784, 256))
W2 = tf.get_variable("W2", shape=[256, 256], initializer=xavier_init(256, 256))
W3 = tf.get_variable("W3", shape=[256, 10], initializer=xavier_init(256, 10))

B1 = tf.Variable(tf.random_normal([256]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([10]))

L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), B1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), B2))
hypothesis = tf.add(tf.matmul(L2, W3), B3)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

init = tf.initialize_all_variables()

with tf.Session() as sess :
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
            avg_cost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys}) / total_batch
            
        print "Epoch:", "%04d" % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)
        
    print "Optimiization Finished"
    
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print "Accuracy", accuracy.eval({X: mnist.test.images, Y:mnist.test.labels})

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 0.300557674
Epoch: 0002 cost= 0.107991545
Epoch: 0003 cost= 0.066779092
Epoch: 0004 cost= 0.045891128
Epoch: 0005 cost= 0.032078192
Epoch: 0006 cost= 0.022000140
Epoch: 0007 cost= 0.017471402
Epoch: 0008 cost= 0.013737831
Epoch: 0009 cost= 0.009422573
Epoch: 0010 cost= 0.006994594
Epoch: 0011 cost= 0.006382749
Epoch: 0012 cost= 0.005240644
Epoch: 0013 cost= 0.004859777
Epoch: 0014 cost= 0.004358721
Epoch: 0015 cost= 0.002943821
Optimiization Finished
Accuracy 0.9763


More deep & dropout

In [1]:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data

def xavier_init(n_inputs, n_outputs, uniform=True):
    if uniform:
        init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
        return tf.random_uniform_initializer(-init_range, init_range)
    else:
        sddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
        return tf.truncated_noraml_initializer(stddev=stddev)

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

#
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

X = tf.placeholder("float", [None, 784])
Y = tf.placeholder("float", [None, 10])

W1 = tf.get_variable("W1", shape=[784, 512], initializer=xavier_init(784, 512))
W2 = tf.get_variable("W2", shape=[512, 512], initializer=xavier_init(512, 512))
W3 = tf.get_variable("W3", shape=[512, 256], initializer=xavier_init(512, 256))
W4 = tf.get_variable("W4", shape=[256, 256], initializer=xavier_init(256, 256))
W5 = tf.get_variable("W5", shape=[256, 10], initializer=xavier_init(256, 10))

B1 = tf.Variable(tf.random_normal([512]))
B2 = tf.Variable(tf.random_normal([512]))
B3 = tf.Variable(tf.random_normal([256]))
B4 = tf.Variable(tf.random_normal([256]))
B5 = tf.Variable(tf.random_normal([10]))

dropout_rate = tf.placeholder("float")
_L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), B1))
L1 = tf.nn.dropout(_L1, dropout_rate)
_L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), B2))
L2 = tf.nn.dropout(_L2, dropout_rate)
_L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), B3))
L3 = tf.nn.dropout(_L3, dropout_rate)
_L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), B4))
L4 = tf.nn.dropout(_L4, dropout_rate)

hypothesis = tf.add(tf.matmul(L4, W5), B5)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis, Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

init = tf.initialize_all_variables()

with tf.Session() as sess :
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys, dropout_rate:0.7})
            avg_cost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys, dropout_rate:0.7}) / total_batch
            
        print "Epoch:", "%04d" % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)
        
    print "Optimiization Finished"
    
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print "Accuracy", accuracy.eval({X: mnist.test.images, Y:mnist.test.labels, dropout_rate : 1.0})

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 0.486955918
Epoch: 0002 cost= 0.173713013
Epoch: 0003 cost= 0.129797729
Epoch: 0004 cost= 0.101075118
Epoch: 0005 cost= 0.086892024
Epoch: 0006 cost= 0.076938430
Epoch: 0007 cost= 0.065241157
Epoch: 0008 cost= 0.061767741
Epoch: 0009 cost= 0.057840851
Epoch: 0010 cost= 0.051317962
Epoch: 0011 cost= 0.049772431
Epoch: 0012 cost= 0.050085541
Epoch: 0013 cost= 0.044566258
Epoch: 0014 cost= 0.043411863
Epoch: 0015 cost= 0.039962754
Optimiization Finished
Accuracy 0.9829
