In [7]:
import torch
import torch.nn as nn
import numpy as np

# Batch Norm

In [9]:
print(1/np.sqrt(1+1e-5))
print(-1/np.sqrt(1+1e-5))

0.9999950000374997
-0.9999950000374997


In [25]:
x = torch.Tensor([
    [[
        [3,4],
        [5,6]
    ]]
    ,
    [[
        [1,2],
        [3,4]
    ]]
])
print(x)
print(x.shape)
print('='*50)


out = nn.BatchNorm2d(x.shape[1])(x)
print(out)
print(out.shape)
print('='*50)

# torch.var is 'biased' estimator of population's variance
"""
all batch is normalized in Batch Norm
"""
mean = torch.mean(torch.Tensor(x))
var = torch.var(torch.Tensor(x), unbiased=False)
print(mean)
print(var)
print( (x-mean)/torch.sqrt(var+1e-05) )

tensor([[[[3., 4.],
          [5., 6.]]],


        [[[1., 2.],
          [3., 4.]]]])
torch.Size([2, 1, 2, 2])
tensor([[[[-0.3333,  0.3333],
          [ 1.0000,  1.6667]]],


        [[[-1.6667, -1.0000],
          [-0.3333,  0.3333]]]], grad_fn=<NativeBatchNormBackward0>)
torch.Size([2, 1, 2, 2])
tensor(3.5000)
tensor(2.2500)
tensor([[[[-0.3333,  0.3333],
          [ 1.0000,  1.6667]]],


        [[[-1.6667, -1.0000],
          [-0.3333,  0.3333]]]])


# Layer Norm

In [45]:
x = torch.Tensor([[
    [1,2,3],
    [4,5,7],
]])
print(x)
print(x.shape)
print('='*50)

out = nn.LayerNorm(x.shape[-1])(x)
print(out)
print(out.shape)
print('='*50)

"""
each layer is normalized in Layer Norm
"""
for i in range(len(x[0])):
    mean = torch.mean(x[0][i])
    var = torch.var(x[0][i], unbiased=False)
    print((x[0][i]-mean)/torch.sqrt(var+1e-5))

tensor([[[1., 2., 3.],
         [4., 5., 7.]]])
torch.Size([1, 2, 3])
tensor([[[-1.2247,  0.0000,  1.2247],
         [-1.0690, -0.2673,  1.3363]]], grad_fn=<NativeLayerNormBackward0>)
torch.Size([1, 2, 3])
tensor([-1.2247,  0.0000,  1.2247])
tensor([-1.0690, -0.2673,  1.3363])
