# 多层全连接神经网络实现MNIST手写数字分类

关键词：线性模型Linear、激活函数ReLU、批标准化BatchNormld、数据加载

#### 设置超参数（Hyperparameters）

In [3]:
batch_size = 64   #每一次训练的样本数量
learning_rate = 1e-2
num_epoch = 20

#### 加载数据

In [4]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# ToTensor变成 0~1   Normalize变成 -1~1
data_tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])

# train属性是区别并对应加载训练集和测试机
train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf)
test_dataset  = datasets.MNIST(root='./data', train=False, transform=data_tf)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

#### 三种模型

In [7]:
from torch import nn

1. 简单的三层全连接神经网络

In [8]:
class simpleNet(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(simpleNet, self).__init__()
        self.layer1 = nn.Linear(in_dim, n_hidden_1)
        self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
        self.layer3 = nn.Linear(n_hidden_2, out_dim)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

2. 添加激活函数

In [141]:
class Activation_Net(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(Activation_Net, self).__init__()
        # inplace=True是指对输入数据进行原地改变，不使用新的变量，节省内存空间
        self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.ReLU(inplace=True)) 
        self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.ReLU(inplace=True)) 
        self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim)) 
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

3. 添加批标准化

In [137]:
class Batch_Net(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(Batch_Net, self).__init__()
        # BatchNorm1d 中 '1' 是数字1
        self.layer1 = nn.Sequential(
            nn.Linear(in_dim, n_hidden_1), 
            nn.BatchNorm1d(n_hidden_1),
            nn.ReLU(True)) 
        self.layer2 = nn.Sequential(
            nn.Linear(n_hidden_1, n_hidden_2), 
            nn.BatchNorm1d(n_hidden_2),
            nn.ReLU(inplace=True)) 
        self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

#### 训练及测试网络

In [10]:
from torch import optim, Tensor
from torch.autograd import Variable

1. 简单的三层全连接神经网络

训练

In [118]:
# 数据格式
for data in train_loader:
    img, label = data
    img = img.view(img.size(0), -1)
    #print('img:\n',img)
    #print('label:\b',label)
    break
img, label

(tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
         [-1., -1., -1.,  ..., -1., -1., -1.],
         [-1., -1., -1.,  ..., -1., -1., -1.],
         ...,
         [-1., -1., -1.,  ..., -1., -1., -1.],
         [-1., -1., -1.,  ..., -1., -1., -1.],
         [-1., -1., -1.,  ..., -1., -1., -1.]]),
 tensor([2, 7, 6, 7, 4, 3, 8, 0, 2, 3, 9, 4, 1, 5, 5, 7, 6, 1, 6, 2, 5, 0, 4, 1,
         7, 5, 6, 3, 1, 2, 2, 3, 8, 4, 5, 1, 3, 0, 2, 6, 6, 9, 6, 6, 2, 6, 3, 1,
         2, 9, 6, 6, 3, 3, 9, 9, 4, 1, 4, 0, 3, 1, 2, 5]))

In [120]:
import torch

In [123]:
n = 0
for i in label:
    n += 1
    temp = torch.zeros(1,10)
    temp[0][i.item()] = 1
    if n==1:
        real_label = temp
    else:
        real_label = torch.cat((real_label, temp), 0)
print('label:\n{}\nreal_label:\n{}'.format(label, real_label))

label:
tensor([2, 7, 6, 7, 4, 3, 8, 0, 2, 3, 9, 4, 1, 5, 5, 7, 6, 1, 6, 2, 5, 0, 4, 1,
        7, 5, 6, 3, 1, 2, 2, 3, 8, 4, 5, 1, 3, 0, 2, 6, 6, 9, 6, 6, 2, 6, 3, 1,
        2, 9, 6, 6, 3, 3, 9, 9, 4, 1, 4, 0, 3, 1, 2, 5])
real_label:
tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0

In [11]:
model_1 = simpleNet(28*28, 300, 100, 10)
criterion_1 = nn.CrossEntropyLoss()
optimizer_1 = optim.SGD(model_1.parameters(), lr=learning_rate)

<font color=red size=5>Q: 为什么label和out的数据格式不同却能算loss</font>
- LOSS参数(input, target), 顺序不能反
- 函数内部会自动变化？
- 交叉熵到底怎么算的

In [134]:
model_1.train()
num = 1000
epoch = 0
for data in train_loader:
    epoch += 1
    # forward
    img, label = data
    img = img.view(img.size(0), -1)
    
    img = Variable(img)
    '''
    ###### label
    n = 0
    for i in label:
        n += 1
        temp = torch.zeros(1,10)
        temp[0][i.item()] = 1
        if n==1:
            real_label = temp
        else:
            real_label = torch.cat((real_label, temp), 0)
    ######
    '''
    label = Variable(label)
    out = model_1(img)
    loss = criterion_1(out, label)
    
    #backward
    optimizer_1.zero_grad()
    loss.backward()
    optimizer_1.step()
    
    if epoch % 100 == 0:
        print('epoch: {}/{}, loss: {:.6f}'.format(epoch, num, loss.item()))
    
    if epoch==num:
        break

epoch: 100/1000, loss: 1.670849
epoch: 200/1000, loss: 0.846519
epoch: 300/1000, loss: 0.721024
epoch: 400/1000, loss: 0.626457
epoch: 500/1000, loss: 0.577877
epoch: 600/1000, loss: 0.405546
epoch: 700/1000, loss: 0.593416
epoch: 800/1000, loss: 0.553243
epoch: 900/1000, loss: 0.401298


In [14]:
for data in train_loader:
    
    img, label = data
    img = img.view(img.size(0), -1)
    
    out = model_1(img)
    break
loss = criterion_1(out, label)
print('img:\n',img)
print('lebel:\n',label)
print(loss)

img:
 tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])
lebel:
 tensor([7, 3, 9, 0, 2, 3, 5, 5, 5, 8, 8, 8, 5, 5, 0, 2, 6, 2, 7, 0, 4, 3, 8, 2,
        4, 0, 9, 1, 2, 5, 1, 0, 7, 5, 3, 4, 1, 8, 2, 0, 5, 0, 5, 8, 6, 3, 4, 8,
        5, 3, 2, 8, 8, 3, 5, 6, 1, 0, 4, 6, 1, 2, 3, 3])
tensor(2.3333, grad_fn=<NllLossBackward>)


In [78]:
from torch import max

<font color=blue> torch.max(out,1)返回张量out每一行最大元素以及它的列数</font>

In [135]:
model_1.eval()
acc = 0
for data in test_loader:
    img, label = data
    img = img.view(img.size(0), -1)
    img = Variable(img)
    out = model_1(img)
    a, pred = max(out, 1)
    b = (pred == label).sum()
    acc += b.item()
#print('out: \n{}\na:\n{}\npred:\n{}\nb:\n{}/{}'.format(out,a,pred,b,pred.size(0)))
print('acc : {:.6f}'.format(acc/len(test_dataset)))

acc : 0.891100


In [110]:
print('acc : {:.6f}'.format(acc/len(test_dataset)))

acc : 0.894900


2. 添加激活函数

<font color=blue>Q:加了激活函数反而准确率下降了 </font>
- 认为是label需要再处理成one-hot形式，但是处理之后反而运行错误
- <font color=red>A:网络结构错误地在最后一层加了一个激活函数 </font>

In [142]:
model_2 = Activation_Net(28*28, 300, 100, 10)
criterion_2 = nn.CrossEntropyLoss()
optimizer_2 = optim.SGD(model_2.parameters(), lr = learning_rate)

In [143]:
model_2.train()
num = 1000
epoch = 0
for data in train_loader:
    epoch += 1
    # forward
    img, label = data
    img = img.view(img.size(0), -1)
    
    img = Variable(img)
    label = Variable(label)
    
    out = model_2(img)
    loss = criterion_2(out, label)
    
    #backward
    optimizer_2.zero_grad()
    loss.backward()
    optimizer_2.step()
    
    if epoch % 100 == 0:
        print('epoch: {}/{}, loss: {:.6f}'.format(epoch, num, loss.item()))
    
    if epoch==num:
        break

epoch: 100/1000, loss: 2.021912
epoch: 200/1000, loss: 1.539746
epoch: 300/1000, loss: 1.087095
epoch: 400/1000, loss: 0.856249
epoch: 500/1000, loss: 0.553302
epoch: 600/1000, loss: 0.473291
epoch: 700/1000, loss: 0.506441
epoch: 800/1000, loss: 0.301461
epoch: 900/1000, loss: 0.348901


In [144]:
model_2.eval()
acc = 0
for data in test_loader:
    img, label = data
    img = img.view(img.size(0), -1)
    img = Variable(img)
    out = model_2(img)
    a, pred = max(out, 1)
    b = (pred == label).sum()
    acc += b.item()
print(acc/len(test_dataset))

0.887


3. 添加批标准化

In [138]:
model_3 = Batch_Net(28*28, 300, 100, 10)
criterion_3 = nn.CrossEntropyLoss()
optimizer_3 = optim.SGD(model_3.parameters(), lr = learning_rate)

In [139]:
model_3.train()
num = 1000
epoch = 0
for data in train_loader:
    epoch += 1
    # forward
    img, label = data
    img = img.view(img.size(0), -1)
    
    img = Variable(img)
    label = Variable(label)
    
    out = model_3(img)
    loss = criterion_3(out, label)
    
    #backward
    optimizer_3.zero_grad()
    loss.backward()
    optimizer_3.step()
    
    if epoch % 100 == 0:
        print('epoch: {}/{}, loss: {:.6f}'.format(epoch, num, loss.item()))
    
    if epoch==num:
        break

epoch: 100/1000, loss: 1.392313
epoch: 200/1000, loss: 1.110351
epoch: 300/1000, loss: 0.909655
epoch: 400/1000, loss: 0.595646
epoch: 500/1000, loss: 0.602998
epoch: 600/1000, loss: 0.356907
epoch: 700/1000, loss: 0.416577
epoch: 800/1000, loss: 0.354607
epoch: 900/1000, loss: 0.310528


In [140]:
model_3.eval()
acc = 0
for data in test_loader:
    img, label = data
    img = img.view(img.size(0), -1)
    img = Variable(img)
    out = model_3(img)
    a, pred = max(out, 1)
    b = (pred == label).sum()
    acc += b.item()
print(acc/len(test_dataset))

0.9417
