In [4]:
import torch
import torch.nn as nn

In [60]:
# NLP Example
torch.manual_seed(0)
batch, sentence_length, embedding_dim = 2, 3, 4
embedding = torch.randn(batch, sentence_length, embedding_dim)
print(embedding)
layer_norm = nn.LayerNorm(4, eps=0, elementwise_affine=False)
# Activate module
layer_norm(embedding)


tensor([[[-1.1258, -1.1524, -0.2506, -0.4339],
         [ 0.8487,  0.6920, -0.3160, -2.1152],
         [ 0.4681, -0.1577,  1.4437,  0.2660]],

        [[ 0.1665,  0.8744, -0.1435, -0.1116],
         [ 0.9318,  1.2590,  2.0050,  0.0537],
         [ 0.6181, -0.4128, -0.8411, -2.3160]]])


tensor([[[-0.9539, -1.0196,  1.2137,  0.7598],
         [ 0.9075,  0.7747, -0.0791, -1.6031],
         [-0.0629, -1.1288,  1.5988, -0.4070]],

        [[-0.0732,  1.6553, -0.8300, -0.7522],
         [-0.1864,  0.2808,  1.3460, -1.4404],
         [ 1.2863,  0.3084, -0.0978, -1.4969]]])

In [61]:
torch.manual_seed(0)
batch, sentence_length, embedding_dim = 2, 3, 4
embedding = torch.randn(batch, sentence_length, embedding_dim)
print(embedding)
mean = embedding.mean(-1, keepdim=True)
print(mean)
std = embedding.std(-1, keepdim=True, unbiased = False)
#print(std)

tensor([[[-1.1258, -1.1524, -0.2506, -0.4339],
         [ 0.8487,  0.6920, -0.3160, -2.1152],
         [ 0.4681, -0.1577,  1.4437,  0.2660]],

        [[ 0.1665,  0.8744, -0.1435, -0.1116],
         [ 0.9318,  1.2590,  2.0050,  0.0537],
         [ 0.6181, -0.4128, -0.8411, -2.3160]]])
tensor([[[-0.7407],
         [-0.2226],
         [ 0.5050]],

        [[ 0.1964],
         [ 1.0624],
         [-0.7380]]])


In [59]:
(embedding - mean) / (std)

tensor([[[-0.9539, -1.0196,  1.2137,  0.7598],
         [ 0.9075,  0.7747, -0.0791, -1.6031],
         [-0.0629, -1.1288,  1.5988, -0.4070]],

        [[-0.0732,  1.6553, -0.8300, -0.7522],
         [-0.1864,  0.2808,  1.3460, -1.4404],
         [ 1.2863,  0.3084, -0.0978, -1.4969]]])

In [62]:
## Pytorch Offical Examples

In [67]:
# NLP Example
batch, sentence_length, embedding_dim = 2, 5, 3
embedding = torch.randn(batch, sentence_length, embedding_dim)
layer_norm = nn.LayerNorm(embedding_dim)
# Activate module
layer_norm(embedding)

tensor([[[ 0.8230, -1.4074,  0.5844],
         [-0.8869, -0.5104,  1.3973],
         [ 1.3689, -0.9919, -0.3771],
         [-0.6800, -0.7335,  1.4135],
         [ 1.1326, -1.2726,  0.1401]],

        [[ 0.4724,  0.9181, -1.3906],
         [-0.1572,  1.2958, -1.1385],
         [ 1.4088, -0.8118, -0.5970],
         [ 1.4140, -0.7282, -0.6858],
         [-0.0269, -1.2111,  1.2380]]], grad_fn=<NativeLayerNormBackward0>)

In [81]:
# Image Example
torch.manual_seed(1)
N, C, H, W = 2, 3, 2, 3
image = torch.randn(N, C, H, W)
# Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
# as shown in the image below
layer_norm = nn.LayerNorm([C, H, W], elementwise_affine=False)
output = layer_norm(image)
output

tensor([[[[-1.2777, -0.3132, -0.1935],
          [-1.3820,  0.4954, -0.1378]],

         [[-0.5987, -1.3815, -0.2658],
          [ 0.9978, -0.3469,  0.3072]],

         [[ 0.3435,  2.7185,  0.9041],
          [ 1.2016, -0.2470, -0.8239]]],


        [[[ 0.2966, -0.1406, -0.1407],
          [-0.2746, -0.1809, -0.7302]],

         [[-1.9590, -0.9930,  1.4539],
          [-0.3902,  1.7448, -1.2209]],

         [[ 1.0613, -0.8649,  1.7884],
          [-0.0712, -0.0990,  0.7202]]]])

In [82]:
# With Learnable Parameters
m = nn.BatchNorm2d(3, affine=False)
output = m(image)
output

tensor([[[[-1.7121, -0.5643, -0.4219],
          [-1.8363,  0.3980, -0.3556]],

         [[-0.5558, -1.0596, -0.3415],
          [ 0.4718, -0.3937,  0.0273]],

         [[-0.6478,  1.2001, -0.2116],
          [ 0.0199, -1.1072, -1.5560]]],


        [[[ 1.5818,  0.8409,  0.8409],
          [ 0.6139,  0.7727, -0.1579]],

         [[-1.2783, -0.3931,  1.8493],
          [ 0.1594,  2.1159, -0.6019]],

         [[ 1.0914, -1.0422,  1.8967],
          [-0.1631, -0.1939,  0.7135]]]])

In [84]:
mean_layer_norm = image.mean((1,2,3), keepdim=True)
std_layer_norm = image.std((1,2,3), unbiased = False, keepdim=True)
mean_layer_norm 

tensor([[[[-0.4984]]],


        [[[ 0.3601]]]])

In [83]:
(image-mean_layer_norm)/std_layer_norm 

tensor([[[[-1.2777, -0.3132, -0.1935],
          [-1.3820,  0.4954, -0.1378]],

         [[-0.5987, -1.3815, -0.2658],
          [ 0.9978, -0.3469,  0.3072]],

         [[ 0.3435,  2.7185,  0.9041],
          [ 1.2017, -0.2470, -0.8239]]],


        [[[ 0.2966, -0.1406, -0.1407],
          [-0.2746, -0.1809, -0.7302]],

         [[-1.9590, -0.9930,  1.4540],
          [-0.3902,  1.7448, -1.2209]],

         [[ 1.0613, -0.8649,  1.7884],
          [-0.0712, -0.0990,  0.7202]]]])

In [85]:
mean_batch_norm = image.mean((0,2,3), keepdim=True)
std_batch_norm = image.std((0,2,3), unbiased = False, keepdim=True)
mean_batch_norm 

tensor([[[[-0.3690]],

         [[-0.2856]],

         [[ 0.4471]]]])

In [86]:
(image-mean_batch_norm)/std_batch_norm 

tensor([[[[-1.7121, -0.5644, -0.4219],
          [-1.8363,  0.3980, -0.3556]],

         [[-0.5558, -1.0597, -0.3415],
          [ 0.4718, -0.3937,  0.0273]],

         [[-0.6478,  1.2001, -0.2116],
          [ 0.0199, -1.1072, -1.5560]]],


        [[[ 1.5818,  0.8409,  0.8409],
          [ 0.6139,  0.7727, -0.1579]],

         [[-1.2783, -0.3931,  1.8494],
          [ 0.1594,  2.1159, -0.6019]],

         [[ 1.0914, -1.0422,  1.8967],
          [-0.1631, -0.1939,  0.7135]]]])