In [1]:
import numpy as np
import tensorflow as tf
import cifar
import matplotlib.pyplot as plt

## Download and extract the dataset

In [2]:
cifar.prepare_cifar_10()
cifar10_labels = cifar.cifar10_labels()

## Define the model function
Here we have defined a common model function which runs for both batch_normalized and non-batch_normalized networks.

#### Batch Norm before or after activation
This is a long-standing debate about Batch Normalization. To put batch norm before or after activation. I have chosen to put it after the activation. And more specifically, at the input of each layer. This makes more sense, since batch norm is introduced to reduce the covariant shift in input of each layer.

In [3]:
def model(features, labels, mode, params):
    layer = features['images']
    if params["print_shapes"]:
        print(layer.shape)

    for filt, kern, stride in zip(params['filters'], params['kern'], params['strides']):
        if params['with_bn']:
            layer = tf.layers.batch_normalization(
                layer, training=mode == tf.estimator.ModeKeys.TRAIN)
        layer = tf.layers.conv2d(
            layer, filt, kern, stride, activation=tf.nn.relu)
        if params["print_shapes"]: 
            print(layer.shape)

    layer = tf.layers.flatten(layer)
    if params["print_shapes"]:
        print(layer.shape)

    for units in params['dense']:
        if params['with_bn']:
            layer = tf.layers.batch_normalization(
                layer, training=mode == tf.estimator.ModeKeys.TRAIN)
        layer = tf.layers.dense(
            layer, units, activation=tf.nn.relu)
        if params["print_shapes"]:
            print(layer.shape)

    if params['with_bn']:
        layer = tf.layers.batch_normalization(
            layer, training=mode == tf.estimator.ModeKeys.TRAIN)

    logits = tf.layers.dense(layer, 10)
    if params["print_shapes"]:
        print(logits.shape)
    cls = tf.argmax(logits, -1)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions={
            "class": cls,
            "score": tf.nn.softmax(logits)
        })

    loss = tf.losses.sparse_softmax_cross_entropy(labels, logits)

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops={
            "accuracy": tf.metrics.accuracy(labels, cls)
        })

    opt = tf.train.AdamOptimizer().minimize(
        loss, global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=opt)

In [4]:
def inp_fn():
    return tf.data.Dataset.from_generator(cifar.cifar10_train,({"images":tf.float32},tf.int64),
                                         ({"images":tf.TensorShape([None,32,32,3])},tf.TensorShape(None)))

def test_inp_fn():
    return tf.data.Dataset.from_generator(cifar.cifar10_test,({"images":tf.float32},tf.int64),
                                         ({"images":tf.TensorShape([None,32,32,3])},tf.TensorShape(None)))

## Without BatchNorm (no dropout)

In [5]:
wobn_params = {
    "filters": [30, 50,60],
    "kern": [[3, 3]]*3,
    "strides": [[2,2],[1,1],[1,1]],
    "dense": [3500,700],
    "with_bn": False,
    "print_shapes": True
}

### View layer shapes

In [6]:
model({"images":tf.placeholder(tf.float32,(10,32,32,3))},
      tf.placeholder(tf.int32,(10)),tf.estimator.ModeKeys.TRAIN,wobn_params)
wobn_params["print_shapes"] = False

(10, 32, 32, 3)
(10, 15, 15, 30)
(10, 13, 13, 50)
(10, 11, 11, 60)
(10, 7260)
(10, 3500)
(10, 700)
(10, 10)


In [7]:
wobn = tf.estimator.Estimator(model, 'wobn-ckpts', config=tf.estimator.RunConfig(save_summary_steps=2),
                              params=wobn_params)

INFO:tensorflow:Using config: {'_model_dir': 'wobn-ckpts', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000241B1C1F358>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Run tensorboard with:
```
tensorboard --logdir wobn-ckpts
```
in current directory

In [9]:
for i in range(20):
    wobn.train(inp_fn)
    wobn.evaluate(test_inp_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wobn-ckpts\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 201 into wobn-ckpts\model.ckpt.
INFO:tensorflow:loss = 0.5700029, step = 200
INFO:tensorflow:Saving checkpoints for 220 into wobn-ckpts\model.ckpt.
INFO:tensorflow:Loss for final step: 0.5607767.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-05-26-11:21:53
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wobn-ckpts\model.ckpt-220
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-26-11:21:56
INFO:tensorflow:Saving dict for global step 220: accuracy = 0.6369, global_step =

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wobn-ckpts\model.ckpt-360
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 361 into wobn-ckpts\model.ckpt.
INFO:tensorflow:loss = 0.12534046, step = 360
INFO:tensorflow:Saving checkpoints for 380 into wobn-ckpts\model.ckpt.
INFO:tensorflow:Loss for final step: 0.41326514.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-05-26-11:27:28
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wobn-ckpts\model.ckpt-380
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-26-11:27:30
INFO:tensorflow:Saving dict for global step 380: accuracy = 0.6097, global_step

## With BatchNorm

In [10]:
wbn_params = {
    "filters": [30, 50,60],
    "kern": [[3, 3]]*3,
    "strides": [[2,2],[1,1],[1,1]],
    "dense": [3500,700],
    "with_bn": True,
    "print_shapes": True
}

### View layer shapes

In [11]:
model({"images":tf.placeholder(tf.float32,(10,32,32,3))},
      tf.placeholder(tf.int32,(10)),tf.estimator.ModeKeys.TRAIN,wbn_params)
wbn_params["print_shapes"] = False

(10, 32, 32, 3)
(10, 15, 15, 30)
(10, 13, 13, 50)
(10, 11, 11, 60)
(10, 7260)
(10, 3500)
(10, 700)
(10, 10)


In [12]:
wbn = tf.estimator.Estimator(model, 'wbn-ckpts', config=tf.estimator.RunConfig(save_summary_steps=2),
                              params=wbn_params)

INFO:tensorflow:Using config: {'_model_dir': 'wbn-ckpts', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000241B2313048>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Run tensorboard with:
```
tensorboard --logdir wbn-ckpts --port 6007
```
in current directory

In [13]:
for i in range(5): 
    wbn.train(inp_fn)
    wbn.evaluate(test_inp_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into wbn-ckpts\model.ckpt.
INFO:tensorflow:loss = 2.9929466, step = 0
INFO:tensorflow:Saving checkpoints for 20 into wbn-ckpts\model.ckpt.
INFO:tensorflow:Loss for final step: 1.2838881.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-05-26-11:29:34
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from wbn-ckpts\model.ckpt-20
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-26-11:29:38
INFO:tensorflow:Saving dict for global step 20: accuracy = 0.1066, global_step = 20, loss = 2.7712424
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done c

## Results
#### Without BN
![Without BN](wobn.png)
#### With BN
![With BN](wbn.png)

As we can see, the loss converges much faster in the version using Batch Normalization. Version without BN takes about 350 steps, whereas version with BN does it in just 80! This is due to the reduction of internal covariant shift for each layer as done by BN.

#### Overfitting
Here, we can see that the eval losses goes up during training - an obvious indication of overfitting. For now, we have concentrated only to know how the BN fastens up the regular networks. So, I didn't use any regularization.

BN also provides a weak form of regularization. But as we can see, it cannot prevent overfitting. It should used with any other regularization layers used normally.

Read more about Batch Normalization in the [original paper](https://arxiv.org/abs/1502.03167). You can try running this notebook in your local system or in [Google Colab](https://colab.research.google.com)