In [3]:
# 一维情况
import torch
def simple_batch_norm_1d(x,gamma,beta):
    eps=1e-5
    x_mean=torch.mean(x,dim=0,keepdim=True)
    x_var=torch.mean((x - x_mean)** 2,dim=0,keepdim=True)
    x_hat=(x - x_mean)/ torch.sqrt(x_var + eps)
    return gamma.view_as(x_mean)* x_hat + beta.view_as(x_mean)

In [4]:
# 验证一下对于任意输入，输出会被标准化
x=torch.arange(15).view(5,3)
gamma=torch.ones(x.shape[1])
beta=torch.zeros(x.shape[1])
print(x)
y=simple_batch_norm_ld(x,gamma,beta)
print(y)

tensor([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11],
        [12, 13, 14]])


RuntimeError: Can only calculate the mean of floating types. Got Long instead.

In [18]:
#测试的时候用训练的移动平均均值和方差
def batch_norm_ld(x,gamma,beta,is_training,moving_mean,moving_var,moving_momentum=0.1):
    eps=1e-5
    x_mean=torch.mean(x,dim=0,keepdim=True)
    x_var=torch.mean((x-x_mean)**2,dim=0,keepdim=True)
    if is_training:
        x_hat=(x-x_mean)/torch.sqrt(x_var+eps)
        moving_mean[:]=moving_momentum*moving_mean+(1-moving_momentum)*x_mean
        moving_var[:]=moving_momentum*moving_var+(1-moving_momentum)*x_var
    else:
        x_hat=(x-moving_mean)/torch.sqrt(moving_var+eps)
        return gamma.view_as(x_mean)*x_hat+beat.view_as(x_mean)


In [19]:
# 使用minist 训练
import numpy as np
from torchvision.datasets import mnist
from torch.utils.data import DataLoader
from torch import nn
from torch.autograd import Variable



In [25]:
train_set=mnist.MNIST("./data",train=True)
test_set=mnist.MNIST("./data",train=False)
def data_tf(x):
    x=np.array(x,dtype="float32")/255
    x=(x-0.5)/0.5# 数据标准化
    x=x.reshape((-1,)) # 拉平
    x=torch.from_numpy(x)
    return x

train_set=mnist.MNIST("./data",train=True,transform=data_tf,download=True)
test_set=mnist.MNIST("./data",train=True,transform=data_tf,download=True)
train_data=DataLoader(train_set,batch_size=64,shuffle=True)
test_data=DataLoader(test_set,batch_size=64,shuffle=False)

class multi_network(nn.Module):
    def __init__(self):
        super(multi_network,self).__init__()
        self.layer1=nn.Linear(784,100)
        self.relu=nn.ReLU(True)
        self.layer2=nn.Linear(100,10)
        self.gemma=nn.Parameter(torch.randn(100))
        self.beta=nn.Parameter(torch.randn(100))
        self.moving_mean=Variable(torch.zeros(100))
        self.moving_var=Variable(torch.zeros(100))
    def forward(self,x,is_train=True):
        x=self.layer1(x)
        x=batch_norm_ld(x,self.gemma,self.beta,is_train,self.moving_mean,self.moving_var)
        x=self.relu(x)
        x=self.layer2(x)
        return x
    

In [26]:
net=multi_network()

In [27]:
# 定义损失函数
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(net.parameters(),1e-1) # 使用梯度下降，学习率0.1

In [24]:
train_data

<torch.utils.data.dataloader.DataLoader at 0x17067e06898>

In [23]:
from utils1 import train
train(net,train_data,test_data,10,optimizer,criterion)

TypeError: threshold_(): argument 'input' (position 1) must be Tensor, not NoneType

In [28]:
# 这里的gamma,beta 都是作为参数进行训练，初始化为随机的高斯分别，moving_mean和moving_var 都是初始化为0，并不是更新的参数，训练完10次之后，可以看
#看移动平均和移动方差被修改为了多少
print(net.moving_mean[:10])

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


In [29]:
# 不使用批标准
no_bn_net=nn.Sequential(
    nn.Linear(784,100),
    nn.ReLU(True),
    nn.Linear(100,10)


)

optimizer=torch.optim.SGD(no_bn_net.parameters(),1e-1) # 使用随机梯度下降，学习率0.1
train(no_bn_net,train_data,test_data,10,optimizer,criterion)

IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number

In [30]:
# 使用批标准化能快速收敛，
# 内置的批标准化函数，一维， torch.nn.BatchNorm1d()和 torch.nn.BatchNorm2d() 
# pytorch 会把 gemma ，beta 作为训练的参数，也将moving_mean和moving_var 作为参数进行训练


In [31]:
# pytorch 中的例子

def data_tf(x):
    x=np.array(x,dtype="float32")/255
    x=(x-0.5)/0.5# 数据标准化
    x=torch.from_numpy(x)
    x=x.unsqueeze(0)
    return x

train_set=mnist.MNIST("./data",train=True,transform=data_tf,download=True)
test_set=mnist.MNIST("./data",train=True,transform=data_tf,download=True)
train_data=DataLoader(train_set,batch_size=64,shuffle=True)
test_data=DataLoader(test_set,batch_size=64,shuffle=False)


In [32]:
# 使用批标准化
class conv_bn_net(nn.Module):
    def __init__(self):
        super(conv_bn_net,self).__init__()
        self.stage1=nn.Sequential(
            nn.Conv2d(1,6,3,padding=1),
            nn.BatchNorm2d(6),
            nn.ReLU(True),
            nn.MaxPool2d(2,2),
            nn.Conv2d(6,16,5),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.MaxPool2d(2,2)
        
        )
        self.classfy=nn.Linear(400,10)
    def forward(self,x):
        x=self.stage1(x)
        x=x.view(x.shape[0],-1)
        x=self.classfy(x)
        return x
net=conv_bn_net()
optimizer=torch.optim.SGD(net.parameters(),1e-1)

In [33]:
train(net,train_data,test_data,5,optimizer,criterion)

IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number