In [1]:
import torch
import torch.nn as nn

In [2]:
torch.manual_seed(66)

batch_example = torch.randn(2, 5)
layer = nn.Sequential(nn.Linear(5, 6), nn.ReLU())

out = layer(batch_example)

In [3]:
mean = out.mean(dim=-1, keepdim=True)
var = out.var(dim=-1, keepdim=True)

In [4]:
print("Layer Outputs:\n\n", out, end="\n\n")
print("Mean:\n", mean, end="\n\n")
print("Variance:\n", var)

Layer Outputs:

 tensor([[0.0000, 0.7322, 0.3517, 0.0989, 0.0000, 0.9316],
        [0.0000, 0.6016, 0.0000, 0.0000, 0.0564, 0.5586]],
       grad_fn=<ReluBackward0>)

Mean:
 tensor([[0.3524],
        [0.2028]], grad_fn=<MeanBackward1>)

Variance:
 tensor([[0.1585],
        [0.0861]], grad_fn=<VarBackward0>)


In [5]:
# Layer Normalization

out_norm = (out - mean) / torch.sqrt(var)

mean = out_norm.mean(dim=-1, keepdim=True)
var = out_norm.var(dim=-1, keepdim=True)

In [6]:
print("Normalized Layer Outputs:\n\n", out_norm, end="\n\n")
print("Mean:\n", mean, end="\n\n")
print("Variance:\n", var)

Normalized Layer Outputs:

 tensor([[-0.8853,  0.9540, -0.0017, -0.6367, -0.8853,  1.4550],
        [-0.6911,  1.3594, -0.6911, -0.6911, -0.4989,  1.2127]],
       grad_fn=<DivBackward0>)

Mean:
 tensor([[ 0.0000e+00],
        [-3.9736e-08]], grad_fn=<MeanBackward1>)

Variance:
 tensor([[1.0000],
        [1.0000]], grad_fn=<VarBackward0>)


In [7]:
torch.set_printoptions(sci_mode=False)

print("Mean:\n", mean, end="\n\n")
print("Variance:\n", var)

Mean:
 tensor([[     0.0000],
        [    -0.0000]], grad_fn=<MeanBackward1>)

Variance:
 tensor([[1.0000],
        [1.0000]], grad_fn=<VarBackward0>)


<div align="center">
  <img src="attachment:6b90424c-3c1b-48a4-a8f5-00db68ab68dd.png" alt="image" width="60%">
</div>