## BatchNorm

1D: [N, **d**]      d个 $\mu \gamma$

2D: [N, **C**, H, W]

3D: [N, **C**, D, H, W]

In [None]:
# 1D
import torch
from torch import nn

x = torch.randn(4, 5)   # [B, d]

m = nn.BatchNorm1d(5)

print(m.weight)
print(m.bias)
print(x)
output = m(x)
print(output)
print(output.shape)

print(torch.mean(output, dim=0))    # 方便记住： dim 表示让这个维度消失
print(torch.std(output, dim=0, unbiased=False))

mean = torch.mean(x, dim=0)
std = torch.sqrt(1e-5 + torch.var(x, dim=0, unbiased=False))

print((x - mean)/std)


Parameter containing:
tensor([1., 1., 1., 1., 1.], requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0.], requires_grad=True)
tensor([[-0.7035, -1.0251,  0.1915,  0.3051, -1.1906],
        [-1.0093,  0.1770, -0.4498, -0.7264, -0.6747],
        [ 0.2758, -1.6039,  1.9133,  3.1144, -0.9229],
        [-1.2051,  1.2022, -0.4152,  1.8852, -1.4377]])
tensor([[-0.0755, -0.6568, -0.1233, -0.5714, -0.4691],
        [-0.6125,  0.4511, -0.7913, -1.2735,  1.3353],
        [ 1.6445, -1.1902,  1.6698,  1.3408,  0.4672],
        [-0.9564,  1.3959, -0.7552,  0.5041, -1.3334]],
       grad_fn=<NativeBatchNormBackward0>)
torch.Size([4, 5])
tensor([-5.9605e-08,  0.0000e+00, -2.9802e-08, -1.4901e-08,  1.7881e-07],
       grad_fn=<MeanBackward1>)
tensor([1.0000, 1.0000, 1.0000, 1.0000, 0.9999], grad_fn=<StdBackward0>)
tensor([[-0.0755, -0.6568, -0.1233, -0.5714, -0.4691],
        [-0.6125,  0.4511, -0.7913, -1.2735,  1.3353],
        [ 1.6445, -1.1902,  1.6698,  1.3408,  0.4672],
        [-

In [29]:
# 2D

import torch
from torch import nn

x = torch.randn(2, 3, 2, 2) # [B, C, H, W]
print(x)

m = nn.BatchNorm2d(3)

print(m.weight)

output = m(x)
# 查看输出
print("Output shape:", output.shape)
print("Output:", output)

# 验证：对每个通道计算均值和标准差（跨 B, H, W）
mean_per_channel = output.mean(dim=(0, 2, 3), keepdim=True)  # 对 BHW 归一化
std_per_channel = output.std(dim=(0, 2, 3), keepdim=True, unbiased=False)

print("Mean per channel:", mean_per_channel)
print(std_per_channel.shape)
print("Std per channel:", std_per_channel)

x_mean = torch.mean(x, dim=(0, 2, 3), keepdim=True)
x_std = torch.sqrt(1e-5 + torch.var(x, dim=(0, 2, 3),unbiased=False, keepdim=True))

my_output = (x - x_mean)/ x_std

print("my output", output)


tensor([[[[-1.9415,  1.0114],
          [ 0.5231,  1.6091]],

         [[ 0.3676,  1.7730],
          [-0.3558,  0.7318]],

         [[-0.5402,  0.4773],
          [ 0.6035, -1.9266]]],


        [[[ 2.7025, -1.8741],
          [-1.3372, -0.7653]],

         [[-1.9585, -2.9105],
          [-0.5275, -0.1459]],

         [[ 1.0650,  0.7406],
          [ 2.0530,  0.4535]]]])
Parameter containing:
tensor([1., 1., 1.], requires_grad=True)
Output shape: torch.Size([2, 3, 2, 2])
Output: tensor([[[[-1.1972,  0.6322],
          [ 0.3296,  1.0024]],

         [[ 0.5375,  1.5504],
          [ 0.0162,  0.8000]],

         [[-0.8258,  0.1016],
          [ 0.2167, -2.0895]]],


        [[[ 1.6798, -1.1554],
          [-0.8229, -0.4686]],

         [[-1.1389, -1.8250],
          [-0.1075,  0.1674]],

         [[ 0.6374,  0.3417],
          [ 1.5379,  0.0800]]]], grad_fn=<NativeBatchNormBackward0>)
Mean per channel: tensor([[[[-7.4506e-09]],

         [[ 2.9802e-08]],

         [[ 0.0000e+00]]]], grad

In [None]:
# 2D

import torch
from torch import nn

x = torch.randn(2, 3, 2) # [B, C, E]    # 单通道的图像
print(x)

m = nn.BatchNorm1d(3)

print(m.weight)

output = m(x)
# 查看输出
print("Output shape:", output.shape)
print("Output:", output)

# 验证：对每个通道计算均值和标准差（跨 B, H, W）
mean_per_channel = output.mean(dim=(0, 2), keepdim=True)  # 对 BHW 归一化
std_per_channel = output.std(dim=(0, 2), keepdim=True, unbiased=False)

print("Mean per channel:", mean_per_channel)
print(std_per_channel.shape)
print("Std per channel:", std_per_channel)

x_mean = torch.mean(x, dim=(0, 2), keepdim=True)
x_std = torch.sqrt(1e-5 + torch.var(x, dim=(0, 2),unbiased=False, keepdim=True))

my_output = (x - x_mean)/ x_std

print("my output", output)


tensor([[[ 0.0117, -1.0647],
         [-0.3778, -0.2873],
         [ 0.3219, -0.3013]],

        [[-0.0512, -1.4496],
         [-0.4920,  0.7124],
         [ 2.3050, -0.5957]]])
Parameter containing:
tensor([1., 1., 1.], requires_grad=True)
Output shape: torch.Size([2, 3, 2])
Output: tensor([[[ 1.0257, -0.6724],
         [-0.5544, -0.3661],
         [-0.0978, -0.6490]],

        [[ 0.9264, -1.2796],
         [-0.7917,  1.7122],
         [ 1.6560, -0.9093]]], grad_fn=<NativeBatchNormBackward0>)
Mean per channel: tensor([[[2.9802e-08],
         [1.4901e-08],
         [0.0000e+00]]], grad_fn=<MeanBackward1>)
torch.Size([1, 3, 1])
Std per channel: tensor([[[1.0000],
         [1.0000],
         [1.0000]]], grad_fn=<StdBackward0>)
my output tensor([[[ 1.0257, -0.6724],
         [-0.5544, -0.3661],
         [-0.0978, -0.6490]],

        [[ 0.9264, -1.2796],
         [-0.7917,  1.7122],
         [ 1.6560, -0.9093]]], grad_fn=<NativeBatchNormBackward0>)
