## LayerNorm

In [12]:
# NLP exmaple
import torch
from torch import nn
batch, sentence_len, embedding_dim = 2, 3, 4
x = torch.randn(batch, sentence_len, embedding_dim) # [B, S, E]
layer_norm = nn.LayerNorm(embedding_dim)
output = layer_norm(x)
print("x: ", x)
print("output: ", output)

print(layer_norm.weight)

mean = torch.mean(x, dim=-1, keepdim=True)
std = torch.sqrt(1e-5 + torch.var(x, dim=-1, unbiased=False, keepdim=True))

print("mean shape: ", mean.shape)

my_out = (x - mean)/std

print(my_out)


x:  tensor([[[-0.5098,  0.5693, -0.1319, -0.0561],
         [ 1.1066,  0.0233,  0.2925, -0.1695],
         [-1.6515, -0.0616, -1.6214, -0.8051]],

        [[-0.6249, -0.6756, -0.3612, -1.4008],
         [ 1.6539, -0.4520,  0.2450,  0.2596],
         [-1.6193,  0.1253, -0.2407, -0.3737]]])
output:  tensor([[[-1.2329,  1.5523, -0.2575, -0.0618],
         [ 1.6306, -0.5959, -0.0426, -0.9921],
         [-0.9392,  1.4824, -0.8932,  0.3500]],

        [[ 0.3649,  0.2335,  1.0486, -1.6470],
         [ 1.6049, -1.1490, -0.2375, -0.2184],
         [-1.6636,  0.9937,  0.4362,  0.2337]]],
       grad_fn=<NativeLayerNormBackward0>)
Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True)
mean shape:  torch.Size([2, 3, 1])
tensor([[[-1.2329,  1.5523, -0.2575, -0.0618],
         [ 1.6306, -0.5959, -0.0426, -0.9921],
         [-0.9392,  1.4824, -0.8932,  0.3500]],

        [[ 0.3649,  0.2335,  1.0486, -1.6470],
         [ 1.6049, -1.1490, -0.2375, -0.2184],
         [-1.6636,  0.9937,  0.43

In [19]:
# Image Example
N, C, H, W = 2, 3, 2, 2
x = torch.randn(N, C, H, W)
layer_norm = nn.LayerNorm([C, H, W])
output = layer_norm(x)

# print(x)
print("output: ", output)

print(layer_norm.weight.shape)

print(torch.mean(output, dim=(1, 2, 3)))

mean = x.view(N, -1).mean(dim=1, keepdim=True)
var = x.view(N, -1).var(dim=1, keepdim=True, unbiased=False)

mean = mean.view(N, 1, 1, 1)
var = var.view(N, 1, 1, 1)

output_manual = (x - mean)/ torch.sqrt(var + 1e-5)

print("output manual:", output_manual)

output:  tensor([[[[ 0.0177,  0.5649],
          [-0.1009,  0.4672]],

         [[ 1.0371,  0.0933],
          [-1.4276, -1.1385]],

         [[ 0.6064, -2.1468],
          [ 0.9841,  1.0431]]],


        [[[ 0.0613,  0.1035],
          [ 0.2043,  0.8139]],

         [[-0.6403,  0.1091],
          [-0.4848,  1.7699]],

         [[-0.9012,  1.6726],
          [-1.6817, -1.0267]]]], grad_fn=<NativeLayerNormBackward0>)
torch.Size([3, 2, 2])
tensor([-2.9802e-08,  0.0000e+00], grad_fn=<MeanBackward1>)
output manual: tensor([[[[ 0.0177,  0.5649],
          [-0.1009,  0.4672]],

         [[ 1.0371,  0.0933],
          [-1.4276, -1.1385]],

         [[ 0.6064, -2.1468],
          [ 0.9841,  1.0431]]],


        [[[ 0.0613,  0.1035],
          [ 0.2043,  0.8139]],

         [[-0.6403,  0.1091],
          [-0.4848,  1.7699]],

         [[-0.9012,  1.6726],
          [-1.6817, -1.0267]]]])
