[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/itmorn/AI.handbook/blob/main/DL/torch/nn/Normalization/GroupNorm.ipynb)

# GroupNorm
对小批输入应用组归一化，参考：[Group Normalization](https://arxiv.org/abs/1803.08494)

**定义**：  
torch.nn.GroupNorm(num_groups, num_channels, eps=1e-05, affine=True, device=None, dtype=None)

**公式**：  
$$y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta$$


**参数**:  
和BatchNorm类似

- num_groups (int) – number of groups to separate the channels into.  把通道分成多少组。

- num_channels (int) – number of channels expected in input.  通道数是多少。
- eps (float) – a value added to the denominator for numerical stability. Default: 1e-5.  为数值稳定性添加到分母的值。默认值:1e-5
- affine (bool) – a boolean value that when set to True, this module has learnable per-channel affine parameters initialized to ones (for weights) and zeros (for biases). Default: True.  一个布尔值，当设置为True时，该模块具有可学习的仿射参数(也就是 全一向量$γ$ 和 全零向量$β$ )。默认值:True

# 图解train模式下的前向传播过程

<p align="center">
<a href="https://raw.githubusercontent.com/itmorn/AI.handbook/main/DL/torch/nn/Normalization/imgs/GroupNorm.svg">
<img src="./imgs/GroupNorm.svg"
    width="2000" /></a></p>


In [4]:
# 调包计算
import torch
import torch.nn as nn

input1 = torch.tensor([
    [
        [[1, 6],
         [9, 4]],
        [[12, 18],
         [13, 11]],
        [[1, 2],
         [3, 4]],
        [[5, 6],
         [7, 8]],
    ],
    [
        [[2, 7],
         [3, 8]],
        [[19, 17],
         [15, 11]],
        [[1, 3],
         [5, 7]],
        [[9, 11],
         [13, 15]],
    ]
], dtype=torch.float32)
print("input1:\n", input1, "\n")

# m = nn.BatchNorm2d(num_features=2, eps=1e-5, momentum=1, affine=True, track_running_stats=True)
m = nn.GroupNorm(num_groups=2, num_channels=4, eps=1e-05,affine=True)
m.train()

print("nn.BatchNorm2d默认初始化可学习参数γ=1:\n", m.weight, "\n")
print("nn.BatchNorm2d默认初始化可学习参数β=0:\n", m.bias, "\n")

output = m(input1)
print("output:\n", output, "\n")  # 结果和手工计算一致


input1:
 tensor([[[[ 1.,  6.],
          [ 9.,  4.]],

         [[12., 18.],
          [13., 11.]],

         [[ 1.,  2.],
          [ 3.,  4.]],

         [[ 5.,  6.],
          [ 7.,  8.]]],


        [[[ 2.,  7.],
          [ 3.,  8.]],

         [[19., 17.],
          [15., 11.]],

         [[ 1.,  3.],
          [ 5.,  7.]],

         [[ 9., 11.],
          [13., 15.]]]]) 

nn.BatchNorm2d默认初始化可学习参数γ=1:
 Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True) 

nn.BatchNorm2d默认初始化可学习参数β=0:
 Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True) 

output:
 tensor([[[[-1.6199, -0.6381],
          [-0.0491, -1.0308]],

         [[ 0.5400,  1.7181],
          [ 0.7363,  0.3436]],

         [[-1.5275, -1.0911],
          [-0.6547, -0.2182]],

         [[ 0.2182,  0.6547],
          [ 1.0911,  1.5275]]],


        [[[-1.3908, -0.5479],
          [-1.2222, -0.3793]],

         [[ 1.4751,  1.1379],
          [ 0.8008,  0.1264]],

         [[-1.5275, -1.0911],
       

In [8]:
# 手工计算
import torch

# 这里涉及到组的计算，编程实现比较麻烦，展示起来不直观，这里就手工计算第1组，证明正确性即可。
input_part = torch.tensor([
    [[1, 6],
     [9, 4]],
    [[12, 18],
     [13, 11]]], dtype=torch.float32)

print("input_part:\n", input_part, "\n")

# 第1步：求均值和方差：
VarX, EX = torch.var_mean(input_part, unbiased=False)  # NCHW
print("Ex:\n", EX, "\n")
print("VarX:\n", VarX, "\n")

# 第2步：减去均值：
result2 = input_part-EX
print("input1-Ex:\n", result2, "\n")

# 第3步：求sqrt(VarX+eps)：
eps = 1e-5
result3 = torch.sqrt(VarX+eps)
print("sqrt(VarX+eps):\n", result3, "\n")

# 第4步：第2步的结果/第3步的结果，完成batch内的数据规范化:
result4 = result2/result3
print("(input1-Ex)/sqrt(VarX+eps):\n", result4, "\n")

# 第5步：使用γ=1，β=0 进行再校正：
γ = 1
β = 0
result5 = result4 * γ + β
print("[(input1-Ex)/sqrt(VarX+eps)] * γ + β:\n", result5, "\n") # 结果和图上一致


input_part:
 tensor([[[ 1.,  6.],
         [ 9.,  4.]],

        [[12., 18.],
         [13., 11.]]]) 

Ex:
 tensor(9.2500) 

VarX:
 tensor(25.9375) 

input1-Ex:
 tensor([[[-8.2500, -3.2500],
         [-0.2500, -5.2500]],

        [[ 2.7500,  8.7500],
         [ 3.7500,  1.7500]]]) 

sqrt(VarX+eps):
 tensor(5.0929) 

(input1-Ex)/sqrt(VarX+eps):
 tensor([[[-1.6199, -0.6381],
         [-0.0491, -1.0308]],

        [[ 0.5400,  1.7181],
         [ 0.7363,  0.3436]]]) 

[(input1-Ex)/sqrt(VarX+eps)] * γ + β:
 tensor([[[-1.6199, -0.6381],
         [-0.0491, -1.0308]],

        [[ 0.5400,  1.7181],
         [ 0.7363,  0.3436]]]) 

