## Batchnorm and Dropout with PyTorch

Click [here](https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm2d.html) for the full documentation of PyTorch's 2D Batchnorm layer.

Click [here](https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html) for the full documentation of PyTorch's Dropout layer.

### Batchnorm

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

In [2]:
import pickle
import gzip

with gzip.open('mnist.pkl.gz', 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
    
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))
x_train = x_train.reshape((-1, 1, 28, 28))
x_valid = x_valid.reshape((-1, 1, 28, 28)) # shape is (batch, channels, height, width)

In [3]:
x_train.shape

torch.Size([50000, 1, 28, 28])

In [102]:
xb = x_train[0:2]
xb.shape

torch.Size([2, 1, 28, 28])

In [123]:
my_batchnorm = nn.BatchNorm2d(num_features=1) # number of channels

for p in my_batchnorm.parameters():
    print(p.shape)
    print(p)

torch.Size([1])
Parameter containing:
tensor([1.], requires_grad=True)
torch.Size([1])
Parameter containing:
tensor([0.], requires_grad=True)


In [124]:
out = my_batchnorm(xb)
print(out.shape)

torch.Size([2, 1, 28, 28])


In [125]:
out

tensor([[[[-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          ...,
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569]]],


        [[[-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          ...,
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569]]]],
       grad_fn=<NativeBatchNormBackward0>)

In [132]:
(xb - xb.mean(dim=(0, 2, 3), keepdims=True))/(torch.sqrt(xb.var(dim=(0, 2, 3), unbiased=False, keepdims=True) + 1e-5))

tensor([[[[-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          ...,
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569]]],


        [[[-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          ...,
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569],
          [-0.4569, -0.4569, -0.4569,  ..., -0.4569, -0.4569, -0.4569]]]])

### Dropout

In [160]:
my_dropout = nn.Dropout(p=0.5)

x = torch.randn(1, 5)
#my_dropout.eval() # outputs are scaled by a factor of 1/(1-p) during training
out = my_dropout(x)

print(x)
print(out) 

tensor([[-1.1562, -0.2661,  1.4779,  0.4316,  0.1407]])
tensor([[-0.0000, -0.0000, 2.9557, 0.8633, 0.2814]])
