# Playing with convolutions in TensorFlow
TensorFlow has great support for convolutional layers. The most popular one is tf.nn.conv2d.
```python
tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)
Input: Batch size x Height x Width x Channels
Filter: Height x Width x Input Channels x Output Channels (e.g. [5, 5, 3, 64])
Strides: 4 element 1-D tensor, strides in each direction (often [1, 1, 1, 1] or [1, 2, 2, 1])
Padding: 'SAME' or 'VALID'
Data_format: default to NHWC
```
![convolution operation effect](./pic/convolution.png)
There are also several other built-in convolutional operations.
## Convnet on MNIST
-----
For MNIST, we will be using two convolutional layers, each followed by a relu and a maxpool layers, and one fully connected layer. 
![mnist convolution version structure](./pic/mnist_convolution_version.png)
### Variable scope
Since we’ll be dealing with multiple layers, it’s important to introduce variable scope. Think of a variable scope something similar to a namespace. A variable name ‘weights’ in variable scope ‘conv1’ will become ‘conv1-weights’. The common practice is to create a variable scope for each layer, so that if you have variable ‘weights’ in both convolution layer 1 and convolution layer 2, there won’t be any name clash.
In variable scope, we don’t create variable using tf.Variable, but instead use tf.get_variable()
```python
tf.get_variable(<name>, <shape>, <initializer>)
```
If a variable with that name already exists in that variable scope, we use that variable. If a variable with that name doesn’t already exists in that variable scope, TensorFlow creates a new variable. This setup makes it really easy to share variables across architecture. This will come in extremely handy when you build complex models and you need to share large sets of variables. Variable scopes help you initialize all of them in one place.
Nodes in the same variable scope will be grouped together, and therefore you don’t have to use name scope any more. To declare a variable scope, you do it the same way you do name scope:
```python
with tf.variable_scope('conv1') as scope:
```
For example:
```python
with tf.variable_scope('conv1') as scope:
    w = tf.get_variable('weights', [5, 5, 1, 32])
    b = tf.get_variable('biases', [32], initializer=tf.random_normal_initializer())
    conv = tf.nn.conv2d(images, w, strides=[1, 1, 1, 1], padding='SAME')
    conv1 = tf.nn.relu(conv + b, name=scope.name)
with tf.variable_scope('conv2') as scope:
    w = tf.get_variable('weights', [5, 5, 32, 64])
    b = tf.get_variable('biases', [64], initializer=tf.random_normal_initializer())
    conv = tf.nn.conv2d(conv1, w, strides=[1, 1, 1, 1], padding='SAME')
    conv2 = tf.nn.relu(conv + b, name=scope.name)
```

 Please refer to the [official documentation](https://www.tensorflow.org/api_docs/) for more information.
 
 ## Code
 ----
Below code is simple to help understand convolution interfaces of tensorflow. Trainning cost too much on CPU even for one epoch...

In [1]:
from __future__ import division
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

WORK_DIR = os.path.abspath(os.curdir)
LEARNING_RATE = 0.01
BATCH_SIZE = 128
N_EPOCHS = 1 #25

class MnistConvnet:
    def __init__(self, learning_rate, batch_size, n_epochs, data_path="./data/mnist"):
        self.learning_rate = 0.01
        self.batch_size = 128
        self.n_epochs = 25
        self.data = input_data.read_data_sets(os.path.join(WORK_DIR, data_path), one_hot=True)

    # input shape defaults to NHWC(batch_size, height, width, channel)
    # kernel shape defaults to (height, width, input_channel, output_channel)
    def _create_filter_layer(self, layer_name, input_layer, kernel_shape, bias_shape, stride_shape=[1, 1, 1, 1], padding='SAME'):
        with tf.variable_scope(layer_name) as scope:
            k = tf.get_variable('kernel', kernel_shape)
            b = tf.get_variable('biases', bias_shape, initializer=tf.random_normal_initializer())
            conv = tf.nn.conv2d(input_layer, k, strides=stride_shape, padding=padding)
            return tf.nn.relu(conv + b, name=scope.name)

    def _create_pool_layer(self, layer_name, input_layer, ksize, stride_shape=[1, 2, 2, 1], padding='SAME'):
        with tf.variable_scope(layer_name) as scope:
            return tf.nn.max_pool(input_layer, ksize=ksize, strides=stride_shape, padding=padding)

    def _create_fc_layer(self, layer_name, input_layer, input_features):
        with tf.variable_scope(layer_name) as scope:
            w = tf.get_variable('weights', [input_features, 1024],
                                initializer=tf.truncated_normal_initializer())
            b = tf.get_variable('biases', [1024],
                                initializer=tf.constant_initializer(0.0))

            input_layer = tf.reshape(input_layer, [-1, input_features])
            return tf.nn.relu(tf.matmul(input_layer, w) + b, name='relu')

    def _create_placeholder(self, batch_size):
        with tf.name_scope("data"):
            X = tf.placeholder(tf.float32, [batch_size, 784], name="input")
            Y = tf.placeholder(tf.float32, [batch_size, 10], name="lables")
            return X, Y 

    def _create_softmax(self, input_layer, layer_name='softmax'):
        with tf.variable_scope(layer_name) as scope:
            w = tf.get_variable('weights', [1024, 10],
                        initializer=tf.truncated_normal_initializer())
            b = tf.get_variable('biases', [10],
                        initializer=tf.random_normal_initializer())
            return tf.matmul(input_layer, w) + b

    def _create_loss(self, logits, labels, layer_name='loss'):
        with tf.name_scope('loss'):
            entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
            return tf.reduce_mean(entropy)

    def build_graph(self):
        self.X, self.Y = self._create_placeholder(self.batch_size)
        conv1 = self._create_filter_layer(layer_name='conv1',
                                    input_layer=tf.reshape(self.    X, shape=[-1, 28, 28, 1]), # -1: dynamically decided 
                                    kernel_shape=[5, 5, 1, 32],
                                    bias_shape=[32])
        pool1 = self._create_pool_layer(layer_name='pool1', input_layer=conv1, ksize=[1, 2, 2, 1])

        conv2 = self._create_filter_layer(layer_name='conv2',
                                    input_layer=pool1,
                                    kernel_shape=[5, 5, 32, 64],
                                    bias_shape=[64])
        pool2 = self._create_pool_layer(layer_name='pool2', input_layer=conv2, ksize=[1, 2, 2, 1])
        fc = self._create_fc_layer(layer_name='fc', input_layer=pool2, input_features = 7 * 7 * 64)
        self.logits = self._create_softmax(input_layer=fc)
        self.loss = self._create_loss(logits=self.logits, labels=self.Y)
        self.optimizer =    tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)

    def train_model(self):
        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            n_batches = int(self.data.train.num_examples/self.batch_size)
            for i in range(self.n_epochs):
                for _ in range(n_batches):
                    X_batch, Y_batch = self.data.train.next_batch(self.batch_size)
                    _, loss_r = sess.run([self.optimizer, self.loss], feed_dict={self.X: X_batch, self.Y:Y_batch})

            print("n_batches=%r test_num=%r loss=%r" % (n_batches, self.data.train.num_examples, loss_r))
            
            total_correct_preds = 0
            for i in range(n_batches):
                X_batch, Y_batch = self.data.test.next_batch(self.batch_size)
                preds = tf.nn.softmax(self.logits)
                correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y_batch, 1))
                accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
                total_correct_preds += sess.run(accuracy, feed_dict={self.X: X_batch, self.Y:Y_batch})
            print("total_correct_preds=%r Accuracy=%r" % (total_correct_preds, total_correct_preds/self.data.test.num_examples))


model = MnistConvnet(LEARNING_RATE, BATCH_SIZE, N_EPOCHS)
model.build_graph()
#model.train_model()

Extracting C:\Users\ziaz\Downloads\Github\CS20SI-Tensorflow-for-Deep-Learning-Research\./data/mnist\train-images-idx3-ubyte.gz
Extracting C:\Users\ziaz\Downloads\Github\CS20SI-Tensorflow-for-Deep-Learning-Research\./data/mnist\train-labels-idx1-ubyte.gz
Extracting C:\Users\ziaz\Downloads\Github\CS20SI-Tensorflow-for-Deep-Learning-Research\./data/mnist\t10k-images-idx3-ubyte.gz
Extracting C:\Users\ziaz\Downloads\Github\CS20SI-Tensorflow-for-Deep-Learning-Research\./data/mnist\t10k-labels-idx1-ubyte.gz
