# NN, ReLy, Xavier, Dropout, and Adam

[Good tensorflow examples](https://github.com/aymericdamien/TensorFlow-Examples)

# Neural Nets (NN) with ReLU for MNIST

In [11]:
import numpy as np
import tensorflow as tf

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1


# tf Graph input
X = tf.placeholder("float", [None, 28*28])  #MNIST data input (img dimention: 28x28)
Y = tf.placeholder("float", [None, 10] )

# Store layers weight & bias
W1 = tf.Variable(tf.random_normal([28*28, 256]))
W2 = tf.Variable(tf.random_normal([256, 256]))
W3 = tf.Variable(tf.random_normal([256, 10]))

B1 = tf.Variable(tf.random_normal([256]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([10]))


# Construct model 
L1 = tf.nn.relu( tf.add(tf.matmul(X,W1),B1) )
L2 = tf.nn.relu( tf.add(tf.matmul(L1,W2),B2) )
hypothesis = tf.add( tf.matmul(L2, W3), B3)


# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis,Y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost) # Adam optimizer.

# Initializing the variables
init = tf.initialize_all_variables()

# Launching the graph
with tf.Session() as sess:
    sess.run(init)
    
    # Training cycle.
    for epoch in range(training_epochs):
        avg_cost =0.
        total_batch = int(mnist.train.num_examples/batch_size)
        
        # Loop over all branches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            # Fit training using batch data.
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys} )
            
            # Compute average loss.
            avg_cost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys})/total_batch
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            print( "Epoch:", '%04d'%(epoch+1), "cost=","{:.9f}".format(avg_cost))
    
    print("Optimization finished.") 
    
    # Test model. 
    correct_prediction = tf.equal(tf.argmax(hypothesis,1), tf.argmax(Y,1))
    
    # Calculate accuracy.
    accuracy = tf.reduce_mean( tf.cast(correct_prediction,"float") )
    print("Accuracy:", accuracy.eval({X:mnist.test.images,Y:mnist.test.labels }))




Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 1377.976688232
Epoch: 0002 cost= 1476.485430797
Epoch: 0003 cost= 1422.412711958
Epoch: 0004 cost= 1365.746614879
Epoch: 0005 cost= 1331.046201838
Epoch: 0006 cost= 1312.775306729
Epoch: 0007 cost= 1293.274005682
Epoch: 0008 cost= 1274.772924583
Epoch: 0009 cost= 1273.432170743
Epoch: 0010 cost= 1265.496035156
Epoch: 0011 cost= 1276.551909846
Epoch: 0012 cost= 1262.088798606
Epoch: 0013 cost= 1279.224567205
Epoch: 0014 cost= 1285.556530762
Epoch: 0015 cost= 1293.972620961
Optimization finished.
Accuracy: 0.9445


# Neural Nets (NN) with ReLU and Xavier initializer for MNIST

In [1]:
import math
import numpy as np
import tensorflow as tf

def xavier_init(n_inputs, n_outputs, uniform=True):
  """Set the parameter initialization using the method described.
  This method is designed to keep the scale of the gradients roughly the same
  in all layers.
  Xavier Glorot and Yoshua Bengio (2010):
           Understanding the difficulty of training deep feedforward neural
           networks. International conference on artificial intelligence and
           statistics.
  Args:
    n_inputs: The number of input nodes into each output.
    n_outputs: The number of output nodes for each input.
    uniform: If true use a uniform distribution, otherwise use a normal.
  Returns:
    An initializer.
  """
  if uniform:
    # 6 was used in the paper.
    init_range = math.sqrt(6.0 / (n_inputs + n_outputs))
    return tf.random_uniform_initializer(-init_range, init_range)
  else:
    # 3 gives us approximately the same limits as above since this repicks
    # values greater than 2 standard deviations from the mean.
    stddev = math.sqrt(3.0 / (n_inputs + n_outputs))
    return tf.truncated_normal_initializer(stddev=stddev)



# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1


# tf Graph input
X = tf.placeholder("float", [None, 28*28])  #MNIST data input (img dimention: 28x28)
Y = tf.placeholder("float", [None, 10] )

# Store layers weight & bias
W1 = tf.get_variable("W1", shape=[784, 256], initializer = xavier_init(784, 256)) 
W2 = tf.get_variable("W2", shape=[256, 256], initializer = xavier_init(256, 256)) 
W3 = tf.get_variable("W3", shape=[256, 10], initializer = xavier_init(256, 10)) 


B1 = tf.Variable(tf.random_normal([256]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([10]))


# Construct model
L1 = tf.nn.relu( tf.add(tf.matmul(X,W1),B1) )
L2 = tf.nn.relu( tf.add(tf.matmul(L1,W2),B2) )
hypothesis = tf.add( tf.matmul(L2, W3), B3)


# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis,Y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost) # Adam optimizer.

# Initializing the variables
init = tf.initialize_all_variables()

# Launching the graph
with tf.Session() as sess:
    sess.run(init)
    
    # Training cycle.
    for epoch in range(training_epochs):
        avg_cost =0.
        total_batch = int(mnist.train.num_examples/batch_size)
        
        # Loop over all branches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            # Fit training using batch data.
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys} )
            
            # Compute average loss.
            avg_cost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys})/total_batch
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            print( "Epoch:", '%04d'%(epoch+1), "cost=","{:.9f}".format(avg_cost))
    
    print("Optimization finished.") 
    
    # Test model. 
    correct_prediction = tf.equal(tf.argmax(hypothesis,1), tf.argmax(Y,1))
    
    # Calculate accuracy.
    accuracy = tf.reduce_mean( tf.cast(correct_prediction,"float") )
    print("Accuracy:", accuracy.eval({X:mnist.test.images,Y:mnist.test.labels }))


Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 0.297062932
Epoch: 0002 cost= 0.103335683
Epoch: 0003 cost= 0.063473773
Epoch: 0004 cost= 0.042808121
Epoch: 0005 cost= 0.030750442
Epoch: 0006 cost= 0.022074451
Epoch: 0007 cost= 0.015681246
Epoch: 0008 cost= 0.012493750
Epoch: 0009 cost= 0.009708986
Epoch: 0010 cost= 0.007344132
Epoch: 0011 cost= 0.006632027
Epoch: 0012 cost= 0.004656308
Epoch: 0013 cost= 0.004655892
Epoch: 0014 cost= 0.003493683
Epoch: 0015 cost= 0.002986874
Optimization finished.
Accuracy: 0.9793


# Neural Nets (NN) with ReLU, Xavier initializer, and dropout on Adam optimizer for MNIST data.

In [1]:
import math
import numpy as np
import tensorflow as tf

def xavier_init(n_inputs, n_outputs, uniform=True):
  """Set the parameter initialization using the method described.
  This method is designed to keep the scale of the gradients roughly the same
  in all layers.
  Xavier Glorot and Yoshua Bengio (2010):
           Understanding the difficulty of training deep feedforward neural
           networks. International conference on artificial intelligence and
           statistics.
  Args:
    n_inputs: The number of input nodes into each output.
    n_outputs: The number of output nodes for each input.
    uniform: If true use a uniform distribution, otherwise use a normal.
  Returns:
    An initializer.
  """
  if uniform:
    # 6 was used in the paper.
    init_range = math.sqrt(6.0 / (n_inputs + n_outputs))
    return tf.random_uniform_initializer(-init_range, init_range)
  else:
    # 3 gives us approximately the same limits as above since this repicks
    # values greater than 2 standard deviations from the mean.
    stddev = math.sqrt(3.0 / (n_inputs + n_outputs))
    return tf.truncated_normal_initializer(stddev=stddev)



# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1


# tf Graph input
X = tf.placeholder("float", [None, 28*28])  #MNIST data input (img dimention: 28x28)
Y = tf.placeholder("float", [None, 10] )

# Store layers weight & bias
W1 = tf.get_variable("W1", shape=[784, 256], initializer = xavier_init(784, 256)) 
W2 = tf.get_variable("W2", shape=[256, 256], initializer = xavier_init(256, 256)) 
W3 = tf.get_variable("W3", shape=[256, 10], initializer = xavier_init(256, 10)) 


B1 = tf.Variable(tf.random_normal([256]))
B2 = tf.Variable(tf.random_normal([256]))
B3 = tf.Variable(tf.random_normal([10]))


# Construct model
dropout_rate = tf.placeholder("float")
_L1 = tf.nn.relu( tf.add(tf.matmul(X,W1),B1) )
L1 = tf.nn.dropout(_L1, dropout_rate)
_L2 = tf.nn.relu( tf.add(tf.matmul(L1,W2),B2) )
L2 = tf.nn.dropout(_L2, dropout_rate)
hypothesis = tf.add( tf.matmul(L2, W3), B3)


# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(hypothesis,Y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost) # Adam optimizer.

# Initializing the variables
init = tf.initialize_all_variables()

# Launching the graph
with tf.Session() as sess:
    sess.run(init)
    
    # Training cycle.
    for epoch in range(training_epochs):
        avg_cost =0.
        total_batch = int(mnist.train.num_examples/batch_size)
        
        # Loop over all branches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            
            # Fit training using batch data.
            sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys, dropout_rate:0.7} )
            
            # Compute average loss.
            avg_cost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys, dropout_rate:1.0})/total_batch
            
        # Display logs per epoch step
        if epoch % display_step == 0:
            print( "Epoch:", '%04d'%(epoch+1), "cost=","{:.9f}".format(avg_cost))
    
    print("Optimization finished.") 
    
    # Test model. 
    correct_prediction = tf.equal(tf.argmax(hypothesis,1), tf.argmax(Y,1))
    
    # Calculate accuracy.
    accuracy = tf.reduce_mean( tf.cast(correct_prediction,"float") )
    print("Accuracy:", accuracy.eval({X:mnist.test.images,Y:mnist.test.labels, dropout_rate:1.0 }))

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 0.329253233
Epoch: 0002 cost= 0.122324671
Epoch: 0003 cost= 0.083812579
Epoch: 0004 cost= 0.064499823
Epoch: 0005 cost= 0.051628341
Epoch: 0006 cost= 0.040803658
Epoch: 0007 cost= 0.035282162
Epoch: 0008 cost= 0.029324314
Epoch: 0009 cost= 0.025424560
Epoch: 0010 cost= 0.022480663
Epoch: 0011 cost= 0.019724995
Epoch: 0012 cost= 0.016837224
Epoch: 0013 cost= 0.014917968
Epoch: 0014 cost= 0.013256886
Epoch: 0015 cost= 0.011836904
Optimization finished.
Accuracy: 0.9833
