In [8]:
""" Neural Network.
A 2-Hidden Layers Fully Connected Neural Network (a.k.a Multilayer Perceptron)
implementation with TensorFlow. This example is using the MNIST database
of handwritten digits (http://yann.lecun.com/exdb/mnist/).
This example is using TensorFlow layers, see 'neural_network_raw' example for
a raw implementation with variables.
Links:
    [MNIST Dataset](http://yann.lecun.com/exdb/mnist/).
Author: Aymeric Damien
"""

from __future__ import print_function

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)

import tensorflow as tf

# Parameters
learning_rate = 0.5
num_steps = 2000
batch_size = 128
display_step = 100

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)


# Define the neural network
def neural_net(x_dict):
    # TF Estimator input is a dict, in case of multiple inputs
    x = x_dict['images']
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.layers.dense(x, n_hidden_1)
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.layers.dense(layer_1, n_hidden_2)
    # Output fully connected layer with a neuron for each class
    out_layer = tf.layers.dense(layer_2, num_classes)
    return out_layer


# Define the model function (following TF Estimator Template)
def model_fn_l2(features, labels, mode):
    # Build the neural network
    logits = neural_net(features)
    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)
    
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
#----------------------------------------------------------------------------------------------------------------------------
    # TODO: IMPLEMENT THIS FUNCTION
    # Define loss and optimizer
    # Compare the use of squared loss, cross entropy loss, and softmax with log-likelihood 
    #Squared loss
    #loss_op_l2 = tf.reduce_mean(tf.nn.l2_loss(pred_probas-tf.cast(labels, dtype=tf.float32)))
    #print(1- tf.multiply(tf.cast(tf.argmax(logits, axis=1), dtype=tf.float32),tf.cast(labels, dtype=tf.float32)))
    loss_op_l2 = tf.reduce_mean(tf.square(1 - tf.cast(tf.one_hot(labels,10),
    dtype = tf.float64)*tf.cast(tf.sigmoid(logits),dtype = tf.float64)))
                                                                                                          
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

    train_op_l2 = optimizer.minimize(loss_op_l2, global_step=tf.train.get_global_step())
#----------------------------------------------------------------------------------------------------------------------------   
    # Initializing the variables
    init = tf.global_variables_initializer()

    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op_l2,
        train_op=train_op_l2,
        eval_metric_ops={'accuracy': acc_op})

    return estim_specs
#----------------------------------------------------------------------------------------------------------------------------
#For softmax with log likelihood
def model_fn_log(features, labels, mode):
    # Build the neural network
    logits = neural_net(features)
    
    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)

    # TODO: IMPLEMENT THIS FUNCTION
    # Define loss and optimizer
    # Compare the use of squared loss, cross entropy loss, and softmax with log-likelihood 
    #Squared loss
    loss_op_log = tf.reduce_mean(tf.losses.log_loss(labels = 
    tf.one_hot(labels,10), predictions =  tf.nn.softmax(logits)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    
    train_op_log = optimizer.minimize(loss_op_log, global_step=tf.train.get_global_step())
    
    # Initializing the variables
    init = tf.global_variables_initializer()
    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op_log,
        train_op=train_op_log,
        eval_metric_ops={'accuracy': acc_op})

    return estim_specs
#----------------------------------------------------------------------------------------------------------------------------

def model_fn_cross(features, labels, mode):
    # Build the neural network
    logits = neural_net(features)

    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)

    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
#----------------------------------------------------------------------------------------------------------------------------
    # TODO: IMPLEMENT THIS FUNCTION
    # Define loss and optimizer
    # Compare the use of squared loss, cross entropy loss, and softmax with log-likelihood 
    
    #cross entropy
    loss_op_cross = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    #train_op_log = optimizer.minimize(loss_op_log, global_step=tf.train.get_global_step())
    #train_op_l2 = optimizer.minimize(loss_op_l2, global_step=tf.train.get_global_step())
    train_op_cross = optimizer.minimize(loss_op_cross, global_step=tf.train.get_global_step())
#----------------------------------------------------------------------------------------------------------------------------   
    # Initializing the variables
    init = tf.global_variables_initializer()
    print(tf.trainable_variables())
    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op_cross,
        train_op=train_op_cross,
        eval_metric_ops={'accuracy': acc_op})

    return estim_specs

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [38]:
# Build the Estimator
model = tf.estimator.Estimator(model_fn_l2)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
model.train(input_fn, steps=num_steps)

# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e = model.evaluate(input_fn)

print("Testing Accuracy:", e['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\FURRYM~1\\AppData\\Local\\Temp\\tmp_sygsgzl', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
{'images': <tf.Tensor 'random_shuffle_queue_DequeueMany:1' shape=(128, 784) dtype=float32>}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\FURRYM~1\AppData\Local\Temp\tmp_sygsgzl\model.ckpt.
INFO:tensorflow:loss = 0.925339738418, step = 1
INFO:tensorflow:global_step/sec: 210.83
INFO:tensorflow:loss = 0.9006195596, step = 101 (0.476 sec)
INFO:tensorflow:global_step/sec: 214.009
INFO:tensorflow:loss = 0.900179887677, step = 201 (0.467 sec)
INFO:tensorflow:global_step/sec: 213.563
INFO:tensorflow:loss = 0.900181628176, step = 301 (0.468 sec)
INFO:tensorflow:global_step/sec: 193.47
INFO:tensorflow

In [9]:
# Build the Estimator for log loss function
model = tf.estimator.Estimator(model_fn_log)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
model.train(input_fn, steps=num_steps)

# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e = model.evaluate(input_fn)

print("Testing Accuracy:", e['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\FURRYM~1\\AppData\\Local\\Temp\\tmpfj4hgfc2', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\FURRYM~1\AppData\Local\Temp\tmpfj4hgfc2\model.ckpt.
INFO:tensorflow:loss = 0.338691, step = 1
INFO:tensorflow:global_step/sec: 191.062
INFO:tensorflow:loss = 0.0712233, step = 101 (0.525 sec)
INFO:tensorflow:global_step/sec: 183.672
INFO:tensorflow:loss = 0.0540964, step = 201 (0.544 sec)
INFO:tensorflow:global_step/sec: 122.523
INFO:tensorflow:loss = 0.0578944, step = 301 (0.816 sec)
INFO:tensorflow:global_step/sec: 119.157
INFO:tensorflow:loss = 0.038522, step = 401 (0.840 sec)
INFO:tensorflow:global_step/sec: 120.551
INFO:tensorflow:loss = 0.05

In [28]:
# Build the Estimator for Cross entropy loss function
model = tf.estimator.Estimator(model_fn_cross)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
model.train(input_fn, steps=num_steps)

# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e = model.evaluate(input_fn)

print("Testing Accuracy:", e['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\FURRYM~1\\AppData\\Local\\Temp\\tmp_pul66z6', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
[<tf.Variable 'dense/kernel:0' shape=(784, 256) dtype=float32_ref>, <tf.Variable 'dense/bias:0' shape=(256,) dtype=float32_ref>, <tf.Variable 'dense_1/kernel:0' shape=(256, 256) dtype=float32_ref>, <tf.Variable 'dense_1/bias:0' shape=(256,) dtype=float32_ref>, <tf.Variable 'dense_2/kernel:0' shape=(256, 10) dtype=float32_ref>, <tf.Variable 'dense_2/bias:0' shape=(10,) dtype=float32_ref>]
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\FURRYM~1\AppData\Local\Temp\tmp_pul66z6\model.ckpt.
INFO:tensorflow:loss = 2.30048, step = 1
INFO:tensorflow:global_step/sec: 200.069
INFO:tensorflow:loss = 0.452

We can see that the cross entropy and softmax with log-liklihood converged faster than squared loss. The cross entropy converged the fastest. We need a higher learning rate in order for the squared loss to converge in the given step, However,  if the learning rate was set too high the log-likelihood and cross entropy would throw a nan error ("ERROR:tensorflow:Model diverged with loss = NaN."). I looked it up and it was due to excessive learning rate. In the end, I made a compromise so that the squared loss would not get stuck while the other two loss functions still worked. 

In [57]:
learning_rate = 0.1
num_steps = 2000
batch_size = 128
display_step = 100

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)


def neural_net_dropout(x_dict):
    # TF Estimator input is a dict, in case of multiple inputs
    x = x_dict['images']
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.layers.dense(x, n_hidden_1)
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.layers.dense(layer_1, n_hidden_2)
    # Output fully connected layer with a neuron for each class
    layer_3 = tf.nn.dropout(layer_2, 0.7)
    out_layer = tf.layers.dense(layer_3, num_classes)
    return out_layer


def model_fn_dropout(features, labels, mode):
    # Build the neural network
    logits = neural_net_dropout(features)

    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)

    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
#----------------------------------------------------------------------------------------------------------------------------
    #Here we used the cross entropy as loss function
    
    # Initializing the variables
    init = tf.global_variables_initializer()
  
    #cross entropy
    loss_op_cross = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits
    (logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op_cross, global_step=tf.train.get_global_step())
#----------------------------------------------------------------------------------------------------------------------------   

    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op_cross,
        train_op=train_op,
        eval_metric_ops={'accuracy': acc_op})

    return estim_specs

In [58]:
e_lis = []
# Build the Estimator
model_dropout = tf.estimator.Estimator(model_fn_dropout)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
model_dropout.train(input_fn, steps=num_steps)

# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e_drop = model_dropout.evaluate(input_fn)

print("Testing Accuracy:", e_drop['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\FURRYM~1\\AppData\\Local\\Temp\\tmp7kwuqvdx', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\FURRYM~1\AppData\Local\Temp\tmp7kwuqvdx\model.ckpt.
INFO:tensorflow:loss = 2.45908, step = 1
INFO:tensorflow:global_step/sec: 198.987
INFO:tensorflow:loss = 0.497305, step = 101 (0.504 sec)
INFO:tensorflow:global_step/sec: 202.712
INFO:tensorflow:loss = 0.454024, step = 201 (0.494 sec)
INFO:tensorflow:global_step/sec: 200.672
INFO:tensorflow:loss = 0.2435, step = 301 (0.498 sec)
INFO:tensorflow:global_step/sec: 193.657
INFO:tensorflow:loss = 0.503302, step = 401 (0.516 sec)
INFO:tensorflow:global_step/sec: 190.865
INFO:tensorflow:loss = 0.495814, 

In [59]:
def neural_net_l1(x_dict):
    # TF Estimator input is a dict, in case of multiple inputs
    x = x_dict['images']
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.layers.dense(x, n_hidden_1)
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.layers.dense(layer_1, n_hidden_2)
    # Output fully connected layer with a neuron for each class
    
    out_layer = tf.layers.dense(layer_2, num_classes)
    return out_layer


def model_fn_l1(features, labels, mode):
    # Build the neural network
    logits = neural_net_l1(features)

    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)

    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
#----------------------------------------------------------------------------------------------------------------------------
    # TODO: IMPLEMENT THIS FUNCTION
    # Define loss and optimizer
    # Compare the use of squared loss, cross entropy loss, and softmax with log-likelihood 

    #Squared loss
    loss_op_l2 = tf.reduce_mean(tf.nn.l2_loss(tf.cast(labels, dtype=tf.float32)))    
    
    #log-likelihood 
    loss_op_log = tf.reduce_mean(tf.nn.log_softmax(logits = logits))              
                                
    #cross entropy
    loss_op_cross = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits
    (logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
    
    l1_regularize = tf.contrib.layers.l1_regularizer(scale=0.005, scope=None)
    weights = tf.trainable_variables()
    l1_penalty = tf.contrib.layers.apply_regularization(l1_regularize, weights)
    total_loss = loss_op_cross + l1_penalty
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(total_loss, global_step=tf.train.get_global_step())
#----------------------------------------------------------------------------------------------------------------------------   
    # Initializing the variables
    init = tf.global_variables_initializer()

    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op_cross,
        train_op=train_op,
        eval_metric_ops={'accuracy': acc_op})

    return estim_specs

In [60]:
# Build the Estimator
model_l1 = tf.estimator.Estimator(model_fn_l1)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
model_l1.train(input_fn, steps=num_steps)

# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e_l1 = model_l1.evaluate(input_fn)

print("Testing Accuracy:", e_l1['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\FURRYM~1\\AppData\\Local\\Temp\\tmpaioc4711', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\FURRYM~1\AppData\Local\Temp\tmpaioc4711\model.ckpt.
INFO:tensorflow:loss = 2.32994, step = 1
INFO:tensorflow:global_step/sec: 191.981
INFO:tensorflow:loss = 0.985597, step = 101 (0.522 sec)
INFO:tensorflow:global_step/sec: 188.89
INFO:tensorflow:loss = 1.07709, step = 201 (0.530 sec)
INFO:tensorflow:global_step/sec: 202.712
INFO:tensorflow:loss = 0.831151, step = 301 (0.494 sec)
INFO:tensorflow:global_step/sec: 199.868
INFO:tensorflow:loss = 0.724401, step = 401 (0.500 sec)
INFO:tensorflow:global_step/sec: 202.918
INFO:tensorflow:loss = 0.604774, 

In [64]:
def neural_net_l2(x_dict):
    # TF Estimator input is a dict, in case of multiple inputs
    x = x_dict['images']
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.layers.dense(x, n_hidden_1)
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.layers.dense(layer_1, n_hidden_2)
    # Output fully connected layer with a neuron for each class
    
    out_layer = tf.layers.dense(layer_2, num_classes)
    return out_layer


def model_fn_l2(features, labels, mode):
    # Build the neural network
    logits = neural_net_l2(features)

    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)

    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
#----------------------------------------------------------------------------------------------------------------------------
    # TODO: IMPLEMENT THIS FUNCTION
    # Define loss and optimizer
    # Compare the use of squared loss, cross entropy loss, and softmax with log-likelihood 

    #Squared loss
    loss_op_l2 = tf.reduce_mean(tf.nn.l2_loss(tf.cast(labels, dtype=tf.float32)))    
    
    #log-likelihood 
    loss_op_log = tf.reduce_mean(tf.nn.log_softmax(logits = logits))              
                                
    #cross entropy
    loss_op_cross = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits
    (logits=logits, labels=tf.cast(labels, dtype=tf.int32)))

    l2_regularize = tf.contrib.layers.l2_regularizer(scale=0.05, scope=None)
    weights = tf.trainable_variables()
    l2_penalty = tf.contrib.layers.apply_regularization(l2_regularize, weights)
    total_loss = loss_op_cross + l2_penalty
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(total_loss, global_step=tf.train.get_global_step())
#----------------------------------------------------------------------------------------------------------------------------   
    # Initializing the variables
    init = tf.global_variables_initializer()

    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)

    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op_cross,
        train_op=train_op,
        eval_metric_ops={'accuracy': acc_op})

    return estim_specs

In [65]:
# Build the Estimator
model_l2 = tf.estimator.Estimator(model_fn_l2)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
model_l2.train(input_fn, steps=num_steps)

# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e_l2 = model_l2.evaluate(input_fn)

print("Testing Accuracy:", e_l2['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\FURRYM~1\\AppData\\Local\\Temp\\tmpg9shc2my', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\FURRYM~1\AppData\Local\Temp\tmpg9shc2my\model.ckpt.
INFO:tensorflow:loss = 2.29429, step = 1
INFO:tensorflow:global_step/sec: 186.049
INFO:tensorflow:loss = 0.851012, step = 101 (0.540 sec)
INFO:tensorflow:global_step/sec: 186.943
INFO:tensorflow:loss = 0.948308, step = 201 (0.533 sec)
INFO:tensorflow:global_step/sec: 189.97
INFO:tensorflow:loss = 0.821229, step = 301 (0.527 sec)
INFO:tensorflow:global_step/sec: 191.428
INFO:tensorflow:loss = 0.870117, step = 401 (0.522 sec)
INFO:tensorflow:global_step/sec: 205.637
INFO:tensorflow:loss = 0.971958,

In [63]:
print(e_l1['accuracy'])
print(e_l2['accuracy'])
print(e_drop['accuracy'])


0.8463
0.8671
0.9149


For the dropout layer I used 70% keep probability for each elements. For both L1 and L2 regularizations, I used 0.05 scaling factor. 
We can see that the dropout regularizations performed the best. The final results were pretty sensitive to the parameters. I tried scaling factor 0.01 and 0.1. The performance results differed. 