-  **Leaky/Parametric ReLU**
-  **Batch Normalization**

In [1]:
# Imports
import numpy as np
import tensorflow as tf

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt



In [2]:
# MNIST data
import tensorflow.examples.tutorials.mnist.input_data as data
mnist = data.read_data_sets('MNIST_data/', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# Create a placeholders for input images and input image labels
n_input = mnist.train.images.shape[1]
n_output = 10
x = tf.placeholder(tf.float32, [None, n_input]) # First dim = None for mini-batch
y = tf.placeholder(tf.float32, [None, n_output])

In [4]:
# Create a placeholder to denote training phase
train_phase = tf.placeholder(tf.bool, name='train_phase')

In [5]:
# CNN requires 4D tensor: [N, H, W, C]
# Shape of x is 2D: [batch, height * width]

side = int(np.sqrt(mnist.train.images.shape[1]))

# Tensor shape "SPECIAL" value: -1 (CHECK: help(tf.reshape))
x_tensor = tf.reshape(x, [-1, side, side, 1]) # -1 makes total size constant

###  ReLU and  PReLU  
-  [Python Implementation](http://gforge.se/2015/06/benchmarking-relu-and-prelu/)

In [6]:
# Leaky ReLU: Allows a small non-zero graident when the unit is not active
def leaky_relu(x, leak=0.2, name='lrelu'):
    """
    Leaky ReLU (NOTE: import tensorflow)
    Arguments:
        x: Tensor
        leak: Leakage parameter (float)
        name: Variable scope
    Returns:
        x: Tensor output of non-linearlyt
    """
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        out = f1 * x + f2 * abs(x)
        return out

###  Batch Normalization
- Reduce internal covariate shift by  normalizing updates for each batch using batch mean and variance
- [Batch Normalization in TensorFlow](https://stackoverflow.com/questions/33949786/how-could-i-%20use-batch-normalization-in-tensorflow)

In [7]:
# Batch Normalization (Source: StackOverflow Batch Normalization in TensorFlow)
def batch_norm(x, phase_train, scope='bn', affine=True):
    """
    Convolutional layer batch normalization (NOTE: import tensorflow)
    Arguments:
        x: 4D Tensor (B x H x W x C)
        phase_train: Boolean, TF Variable (true indates training phase)
        scope: String, Variable scope
        affine: Boolean, Affine trasnform output or not
    Output:
        Batch normalized output
    """
    with tf.variable_scope(scope):
        
        # Get Tensor shape
        shape = x.get_shape().as_list()
        
        # Batch Norm: Beta 
        # [NOTE: Create a constant tensor]
        beta = tf.Variable(tf.constant(0.0, shape=[shape[-1]]),
                           name='beta', trainable=True)
        
        # Batch Norm: Gamma 
        # [NOTE: Create a constant tensor]
        gamma = tf.Variable(tf.constant(1.0, shape=[shape[-1]]),
                           name='gamma', trainable=affine)
        
        # Batch: Mean and Variance 
        # [NOTE: tf.nn.moments calculates tensor mean and var. USE: help(tf.nn.moments)]
        batch_mean, batch_var = tf.nn.moments(x, axes=[0, 1, 2], name='moments')
        
        # Exponential Moving Average
        EMA = tf.train.ExponentialMovingAverage(decay=0.9)
        ema_mean, ema_var = EMA.average(batch_mean), EMA.average(batch_var)
        # NOTE: help(tf.train.ExponentialMovingAverage)
        
        def mean_var_with_update():
            ema_apply_op = EMA.apply([batch_mean, batch_var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(batch_mean), tf.identity(batch_var)
        
        mean, var = tf.cond(phase_train, 
                            mean_var_with_update,
                            lambda: (EMA.average(batch_mean), EMA.average(batch_var)))
        
        norm = tf.nn.batch_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, affine)
    return norm    

In [8]:
# Fully Connected network
def fc(x, n_units, scope=None, stddev=0.02, activation=lambda x: x):
    shape = x.get_shape().as_list()
    with tf.variable_scope(scope or 'Linear'):
        mtx = tf.get_variable('Matrix', [shape[1], n_units], tf.float32,
                              tf.random_normal_initializer(stddev=stddev))
        return activation(tf.matmul(x, mtx))

In [9]:
# Convolution 2D
def conv2d(x, n_filters, filter_w=5, filter_h=5, stride_w=2, stride_h=2, 
           stddev=0.02, activation=None, bias=True, padding='SAME', name='Conv2D'):
    """
    2D Convolution
    Arguments:
        x: Tensor
        n_filters: Number of filters to apply
        filter_w: Filter width
        filter_h: Filter height
        stride_w: Stride in cols
        stride_h: Stride in rows
        stddev: Std. Deviation of initialization
        activation: Non-linearity function
        padding: 'SAME' or 'VALID'
        name: Variable scope to use
    output:
        x: Tensor (Convolved)
    """
    with tf.variable_scope(name):
        W = tf.get_variable('W', 
                            [filter_w, filter_h, x.get_shape()[-1], n_filters],
                            initializer=tf.truncated_normal_initializer(stddev=stddev))
        conv = tf.nn.conv2d(x, W, strides=[1, stride_w, stride_h, 1], padding=padding)
        if bias:
            b = tf.get_variable('b', [n_filters], 
                               initializer=tf.truncated_normal_initializer(stddev=stddev))
            conv = tf.nn.bias_add(conv, b)
        if activation:
            conv = activation(conv)
        return conv

### CNN

#### Convolution layers

In [10]:
# Convolution layer-1
conv_1 = conv2d(x_tensor, n_filters=16, name='conv_1')
bn_1 = batch_norm(conv_1, phase_train=train_phase, scope='bn_1')
a_conv_1 = leaky_relu(bn_1, name='lrelu_1')

# Convolution layer-2
conv_2 = conv2d(a_conv_1, n_filters=8, name='conv_2')
bn_2 = batch_norm(conv_2, phase_train=train_phase, scope='bn_2')
a_conv_2 = leaky_relu(bn_2, name='lrelu_2')

# Convolution layer-3
conv_3 = conv2d(a_conv_2, n_filters=4, name='conv_3')
bn_3 = batch_norm(conv_3, phase_train=train_phase, scope='bn_3')
a_conv_3 = leaky_relu(bn_3, name='lrelu_3')

In [11]:
# Convolution layer: Dimension check
print 'Convolution Layer-1'
print conv_1.get_shape()
print bn_1.get_shape()
print a_conv_1.get_shape()
print 'Convolution Layer-2'
print conv_2.get_shape()
print bn_2.get_shape()
print a_conv_2.get_shape()
print 'Convolution Layer-3'
print conv_3.get_shape()
print bn_3.get_shape()
print a_conv_3.get_shape()

Convolution Layer-1
(?, 14, 14, 16)
(?, 14, 14, 16)
(?, 14, 14, 16)
Convolution Layer-2
(?, 7, 7, 8)
(?, 7, 7, 8)
(?, 7, 7, 8)
Convolution Layer-3
(?, 4, 4, 4)
(?, 4, 4, 4)
(?, 4, 4, 4)


#### Fully Connected layer
- NOTE: Print dimensions  of  last  convolution  layer to  reshape

In [12]:
# Reshape activation output of third convolution layer to connect to a fully connected layer
a_conv_3_flat = tf.reshape(a_conv_3, [-1, 4 * 4 * 4]) # Convolution Layer-3 Shape: (?, 4, 4, 4)
fc_4 = fc(a_conv_3_flat, n_output)
y_pred = tf.nn.softmax(fc_4)

In [13]:
# Fully Connected layer: Dimension check
print 'Fully Connected Layer-4'
print a_conv_3_flat.get_shape()
print fc_4.get_shape()
print y_pred.get_shape()

Fully Connected Layer-4
(?, 64)
(?, 10)
(?, 10)


#### Cross Entropy Loss

In [14]:
# Loss
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))

# Optimizer
optimizer = tf.train.AdamOptimizer().minimize(cross_entropy)

In [15]:
# Accuracy
y_hat = tf.argmax(y_pred, dimension=1)
y_true = tf.argmax(y, dimension=1)
correct_pred = tf.equal(y_hat, y_true)
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

In [16]:
# Create a session to use the graph
sess = tf.Session()

# Initialize all variables
sess.run(tf.initialize_all_variables())

In [18]:
# Train model
batch_size = 100
n_epochs = 5
batches = mnist.train.num_examples//batch_size

for epoch in range(n_epochs):
    for batch in range(batches):
        batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, train_phase: True})
    acc = sess.run(accuracy, feed_dict={x: mnist.validation.images,
                                        y: mnist.validation.labels,
                                        train_phase: False})
    print 'Epoch-{} Validation Accuracy: {}'.format(epoch, acc)
    
print 'Test Accuracy: ', sess.run(accuracy, feed_dict={x: mnist.test.images,
                                                       y: mnist.test.labels,
                                                       train_phase: False})

Epoch-0 Validation Accuracy: 0.959599971771
Epoch-1 Validation Accuracy: 0.977199971676
Epoch-2 Validation Accuracy: 0.973800003529
Epoch-3 Validation Accuracy: 0.978399991989
Epoch-4 Validation Accuracy: 0.979799985886
Test Accuracy:  0.9825


In [17]:
help(tf.variable_scope)

Help on function variable_scope in module tensorflow.python.ops.variable_scope:

variable_scope(*args, **kwds)
    Returns a context manager for defining ops that creates variables (layers).
    
    This context manager validates that the (optional) `values` are from
    the same graph, ensures that graph is the default graph, and pushes a
    name scope and a variable scope.
    
    If `name_or_scope` is not None, it is used as is. If `scope` is None, then
    `default_name` is used.  In that case, if the same name has been previously
    used in the same scope, it will made unique be appending `_N` to it.
    
    Variable scope allows to create new variables and to share already created
    ones while providing checks to not create or share by accident. For details,
    see the [Variable Scope How To](../../how_tos/variable_scope/index.md),
    here we present only a few basic examples.
    
    Simple example of how to create a new variable:
    
    ```python
    with tf.variabl