<a href="https://colab.research.google.com/github/ITAM-DS/deep-learning/blob/master/codigo/08-VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# !pip install -U d2l
from d2l import tensorflow as d2l
import tensorflow as tf
import numpy as np

## Aplicación de Batch-Normalization al modelo LeNet

In [6]:
def net():
    return tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=6, kernel_size=5,
                               input_shape=(28, 28, 1)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('sigmoid'),
        tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
        tf.keras.layers.Conv2D(filters=16, kernel_size=5),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('sigmoid'),
        tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(120),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('sigmoid'),
        tf.keras.layers.Dense(84),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation('sigmoid'),
        tf.keras.layers.Dense(10),
    ])

In [12]:
X = tf.random.uniform(shape=(1, 28, 28, 1))
for layer in net().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)


Conv2D output shape:	 (1, 24, 24, 6)
BatchNormalization output shape:	 (1, 24, 24, 6)
Activation output shape:	 (1, 24, 24, 6)
MaxPooling2D output shape:	 (1, 12, 12, 6)
Conv2D output shape:	 (1, 8, 8, 16)
BatchNormalization output shape:	 (1, 8, 8, 16)
Activation output shape:	 (1, 8, 8, 16)
MaxPooling2D output shape:	 (1, 4, 4, 16)
Flatten output shape:	 (1, 256)
Dense output shape:	 (1, 120)
BatchNormalization output shape:	 (1, 120)
Activation output shape:	 (1, 120)
Dense output shape:	 (1, 84)
BatchNormalization output shape:	 (1, 84)
Activation output shape:	 (1, 84)
Dense output shape:	 (1, 10)


In [9]:
X = tf.random.uniform((1, 28, 28, 1))
parameters = 0

for layer in net().layers:
    X = layer(X)
    print(layer.__class__.__name__, "Parameters: ",  
          np.sum([np.prod(p.shape) for p in layer.get_weights()]), 
          '\t. Output shape:\t', X.shape)
    parameters += np.sum([np.prod(p.shape) for p in layer.get_weights()])

print("--"*30)
print("Total parameters: ", parameters)


Conv2D Parameters:  156 	. Output shape:	 (1, 24, 24, 6)
BatchNormalization Parameters:  24 	. Output shape:	 (1, 24, 24, 6)
Activation Parameters:  0.0 	. Output shape:	 (1, 24, 24, 6)
MaxPooling2D Parameters:  0.0 	. Output shape:	 (1, 12, 12, 6)
Conv2D Parameters:  2416 	. Output shape:	 (1, 8, 8, 16)
BatchNormalization Parameters:  64 	. Output shape:	 (1, 8, 8, 16)
Activation Parameters:  0.0 	. Output shape:	 (1, 8, 8, 16)
MaxPooling2D Parameters:  0.0 	. Output shape:	 (1, 4, 4, 16)
Flatten Parameters:  0.0 	. Output shape:	 (1, 256)
Dense Parameters:  30840 	. Output shape:	 (1, 120)
BatchNormalization Parameters:  480 	. Output shape:	 (1, 120)
Activation Parameters:  0.0 	. Output shape:	 (1, 120)
Dense Parameters:  10164 	. Output shape:	 (1, 84)
BatchNormalization Parameters:  336 	. Output shape:	 (1, 84)
Activation Parameters:  0.0 	. Output shape:	 (1, 84)
Dense Parameters:  850 	. Output shape:	 (1, 10)
------------------------------------------------------------
Total 

In [9]:
lr, num_epochs, batch_size = 0.1, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=28)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr)

## Ejercicios

Tomados del [libro de texto](https://d2l.ai/chapter_convolutional-modern/batch-norm.html):

```{markdown}
    1. Can we remove the bias parameter from the fully-connected layer or the convolutional layer before the batch normalization? Why?
    2. Compare the learning rates for LeNet with and without batch normalization.
        - Plot the increase in training and test accuracy.
        - How large can you make the learning rate?
    3. Do we need batch normalization in every layer? Experiment with it?
    4. Can you replace dropout by batch normalization? How does the behavior change?
    5. Fix the parameters beta and gamma, and observe and analyze the results.
    6. Review the online documentation for BatchNorm from the high-level APIs to see the other applications for batch normalization.

```