[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/itmorn/AI.handbook/blob/main/DL/torch/nn/Normalization/LayerNorm.ipynb)

# LayerNorm
在4D输入(NCHW)上应用Layer Normalization，论文参考[Layer Normalization](https://arxiv.org/abs/1607.06450)

**定义**：  
torch.nn.LayerNorm(normalized_shape, eps=1e-05, elementwise_affine=True, device=None, dtype=None)

**公式**：  
$$y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta$$

和BN类似，参数可参考BN的讲解

# 图解train模式下的前向传播过程

<p align="center">
<a href="https://raw.githubusercontent.com/itmorn/AI.handbook/main/DL/torch/nn/Normalization/imgs/LayerNorm.svg">
<img src="./imgs/LayerNorm.svg"
    width="2000" /></a></p>


In [1]:
# 调包计算
import torch
import torch.nn as nn

input1 = torch.tensor([
    [
        [[1, 6],
         [9, 4]],
        [[12, 18],
         [13, 11]]],
    [
        [[2, 7],
         [3, 8]],
        [[19, 17],
         [15, 11]]
    ]
], dtype=torch.float32)
print("input1:\n", input1,"\n")

N, C, H, W = 2, 2, 2, 2

m = nn.LayerNorm(normalized_shape=[C, H, W], eps=1e-05, elementwise_affine=True)

output = m(input1)
print("output:\n", output,"\n") # 结果和手工计算一致

input1:
 tensor([[[[ 1.,  6.],
          [ 9.,  4.]],

         [[12., 18.],
          [13., 11.]]],


        [[[ 2.,  7.],
          [ 3.,  8.]],

         [[19., 17.],
          [15., 11.]]]]) 

output:
 tensor([[[[-1.6199, -0.6381],
          [-0.0491, -1.0308]],

         [[ 0.5400,  1.7181],
          [ 0.7363,  0.3436]]],


        [[[-1.3908, -0.5479],
          [-1.2222, -0.3793]],

         [[ 1.4751,  1.1379],
          [ 0.8008,  0.1264]]]], grad_fn=<NativeLayerNormBackward0>) 



In [8]:
# 手工计算
import torch

# 这里涉及到组的计算，编程实现比较麻烦，展示起来不直观，这里就手工计算第1组，证明正确性即可。
input1 = torch.tensor([
    [
        [[1, 6],
         [9, 4]],
        [[12, 18],
         [13, 11]]],
    [
        [[2, 7],
         [3, 8]],
        [[19, 17],
         [15, 11]]
    ]
], dtype=torch.float32)

print("input1:\n", input1, "\n")

# 第1步：求均值和方差：
VarX, EX = torch.var_mean(input1, dim=(1,2,3),unbiased=False)  # NCHW
print("Ex:\n", EX, "\n")
print("VarX:\n", VarX, "\n")

# 第2步：减去均值：
result2 = input1-EX
print("input1-Ex:\n", result2, "\n")

# 第3步：求sqrt(VarX+eps)：
eps = 1e-5
result3 = torch.sqrt(VarX+eps)
print("sqrt(VarX+eps):\n", result3, "\n")

# 第4步：第2步的结果/第3步的结果，完成batch内的数据规范化:
result4 = result2/result3
print("(input1-Ex)/sqrt(VarX+eps):\n", result4, "\n")

# 第5步：使用γ=1，β=0 进行再校正：
γ = 1
β = 0
result5 = result4 * γ + β
print("[(input1-Ex)/sqrt(VarX+eps)] * γ + β:\n", result5, "\n") # 结果和图上一致


input1:
 tensor([[[[ 1.,  6.],
          [ 9.,  4.]],

         [[12., 18.],
          [13., 11.]]],


        [[[ 2.,  7.],
          [ 3.,  8.]],

         [[19., 17.],
          [15., 11.]]]]) 

Ex:
 tensor([ 9.2500, 10.2500]) 

VarX:
 tensor([25.9375, 35.1875]) 

input1-Ex:
 tensor([[[[-8.2500, -4.2500],
          [-0.2500, -6.2500]],

         [[ 2.7500,  7.7500],
          [ 3.7500,  0.7500]]],


        [[[-7.2500, -3.2500],
          [-6.2500, -2.2500]],

         [[ 9.7500,  6.7500],
          [ 5.7500,  0.7500]]]]) 

sqrt(VarX+eps):
 tensor([5.0929, 5.9319]) 

(input1-Ex)/sqrt(VarX+eps):
 tensor([[[[-1.6199, -0.7165],
          [-0.0491, -1.0536]],

         [[ 0.5400,  1.3065],
          [ 0.7363,  0.1264]]],


        [[[-1.4236, -0.5479],
          [-1.2272, -0.3793]],

         [[ 1.9144,  1.1379],
          [ 1.1290,  0.1264]]]]) 

