# 多层全连接神经网络实现MNIST手写数字分类

关键词：线性模型Linear、激活函数ReLU、批标准化BatchNormld、数据加载

#### 设置超参数（Hyperparameters）

In [4]:
batch_size = 64   #每一次训练的样本数量
learning_rate = 1e-2
num_epoch = 20

#### 加载数据

In [5]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
data_tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])

# train属性是区别并对应加载训练集和测试机
train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf)
test_dataset  = datasets.MNIST(root='./data', train=False, transform=data_tf)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

#### 三种模型

In [6]:
from torch import nn

1. 简单的三层全连接神经网络

In [17]:
class simpleNet(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(simpleNet, self).__init__()
        self.layer1 = nn.Linear(in_dim, n_hidden_1)
        self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
        self.layer3 = nn.Linear(n_hidden_2, out_dim)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

2. 添加激活函数

In [70]:
class Activation_Net(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(Activation_Net, self).__init__()
        # inplace=True是指对输入数据进行原地改变，不使用新的变量，节省内存空间
        self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.ReLU(inplace=True)) 
        self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.ReLU(inplace=True)) 
        self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim), nn.ReLU(inplace=True)) 
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

3. 添加批标准化

In [35]:
class Batch_Net(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(Batch_Net, self).__init__()
        # BatchNorm1d 中 '1' 是数字1
        self.layer1 = nn.Sequential(
            nn.Linear(in_dim, n_hidden_1), 
            nn.BatchNorm1d(n_hidden_1),
            nn.ReLU(True)) 
        self.layer2 = nn.Sequential(
            nn.Linear(n_hidden_1, n_hidden_2), 
            nn.BatchNorm1d(n_hidden_2),
            nn.ReLU(inplace=True)) 
        self.layer1 = nn.Sequential(nn.Linear(n_hidden_2, out_dim), nn.ReLU(inplace=True)) 
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

#### 训练及测试网络

In [40]:
from torch import optim, Tensor
from torch.autograd import Variable

1. 简单的三层全连接神经网络

训练

In [44]:
# 数据格式
for data in train_loader:
    img, label = data
    img = img.view(img.size(0), -1)
    print('img:\n',img)
    print('label:\b',label)
    break
img, data

img:
 tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])
label: tensor([0, 1, 6, 7, 0, 0, 3, 8, 4, 4, 5, 2, 9, 5, 8, 9, 6, 8, 7, 3, 8, 8, 0, 1,
        7, 8, 8, 5, 2, 6, 7, 2, 3, 5, 0, 4, 8, 1, 8, 9, 5, 1, 3, 1, 1, 4, 8, 7,
        8, 0, 0, 0, 1, 1, 2, 9, 4, 6, 1, 8, 8, 5, 5, 9])


In [57]:
for data in train_loader:
    img, label = data
    break;
img = img.view(img.size(0), -1)    
img = Variable(img)
label = Variable(label)
out = model_1(img)
print(label)
print(out)

tensor([5, 5, 9, 2, 3, 2, 1, 6, 6, 1, 4, 9, 9, 6, 1, 0, 0, 0, 4, 1, 9, 7, 8, 3,
        7, 3, 4, 6, 4, 5, 5, 5, 9, 7, 7, 6, 1, 4, 7, 7, 9, 2, 0, 1, 2, 1, 7, 9,
        7, 1, 0, 8, 7, 1, 5, 6, 7, 2, 9, 6, 8, 6, 9, 0])
tensor([[-1.2809e+00, -1.2003e-01,  3.5806e-01,  1.6877e+00, -1.9977e+00,
          3.4084e+00, -3.4300e-01, -1.5197e+00,  4.6814e-01, -1.2811e+00],
        [ 2.5626e+00, -4.6572e+00, -3.5582e+00,  1.0630e+00, -1.8602e+00,
          1.1259e+01, -5.1556e+00, -6.6359e+00,  6.7563e+00, -1.9455e+00],
        [-3.9840e+00, -6.8104e+00,  1.9541e+00, -7.9383e-01,  2.9716e+00,
         -4.4529e+00, -3.6774e+00,  4.4158e+00,  1.7016e+00,  7.8981e+00],
        [-1.0204e+00, -2.4762e+00,  9.8694e+00,  4.3000e-01,  1.0583e+00,
          8.4748e-02,  3.7658e+00, -8.9821e+00,  2.1102e+00, -5.6647e+00],
        [-1.2028e+00, -1.3707e+00,  7.5013e-01,  6.0447e+00, -2.9314e+00,
          4.0909e+00,  2.4539e+00, -6.9472e+00,  1.2055e+00, -2.8189e+00],
        [-1.7025e+00,  1.8573e+00,  1.

In [59]:
loss_1 = criterion_1(out, label)

In [73]:
model_1 = simpleNet(28*28, 300, 100, 10)
criterion_1 = nn.CrossEntropyLoss()
optimizer_1 = optim.SGD(model_1.parameters(), lr=learning_rate)

<font color=red size=5>Q: 为什么label和out的数据格式不同却能算loss</font>
- LOSS参数(input, target), 顺序不能反

In [74]:
num = 1000
epoch = 0
for data in train_loader:
    epoch += 1
    # forward
    img, label = data
    img = img.view(img.size(0), -1)
    
    img = Variable(img)
    label = Variable(label)
    
    out = model_1(img)
    loss = criterion_1(out, label)
    
    #backward
    optimizer_1.zero_grad()
    loss.backward()
    optimizer_1.step()
    
    if epoch % 100 == 0:
        print('epoch: {}/{}, loss: {:.6f}'.format(epoch, num, loss.item()))
    
    if epoch==num:
        break

epoch: 100/1000, loss: 1.480937
epoch: 200/1000, loss: 0.967255
epoch: 300/1000, loss: 0.882846
epoch: 400/1000, loss: 0.631312
epoch: 500/1000, loss: 0.339124
epoch: 600/1000, loss: 0.600461
epoch: 700/1000, loss: 0.508997
epoch: 800/1000, loss: 0.482839
epoch: 900/1000, loss: 0.201672


In [78]:
from torch import max

In [80]:
model_1.eval()
for data in test_loader:
    img, label = data
    break;
img = img.view(img.size(0), -1)
img = Variable(img)
out = model_1(img)
a, pred = max(out, 1)
b = (pred == label).sum()
print('out: \n{}\na:\n{}\npred:\n{}\nb:\n'.format(out,a,pred,b))

out: 
tensor([[ 6.2126e-01, -3.1635e+00, -1.8923e+00,  2.0778e+00, -1.0035e+00,
         -1.7577e-02, -7.4395e+00,  9.8309e+00, -1.5224e+00,  3.9399e+00],
        [ 3.1480e+00, -5.3276e-01,  5.8375e+00,  2.7525e+00, -6.7138e+00,
          3.4681e+00,  4.8478e+00, -9.8556e+00,  2.1796e+00, -7.6293e+00],
        [-3.8118e+00,  5.0861e+00,  9.0560e-01,  4.4369e-01, -2.0335e+00,
          3.3118e-01, -2.9281e-01,  6.5547e-01, -8.2002e-02, -7.8457e-01],
        [ 1.0709e+01, -9.2160e+00,  2.9988e-01, -6.8600e-01, -4.7648e+00,
          3.5720e+00,  7.0587e-01, -1.2019e+00,  1.2818e-01, -1.2861e+00],
        [-9.9028e-01, -4.7108e+00,  1.4316e-01, -2.2846e+00,  4.4996e+00,
         -5.6999e-01,  1.9696e-02,  1.4214e+00, -5.3075e-02,  2.7166e+00],
        [-4.9137e+00,  5.9972e+00,  7.6212e-01,  1.3125e+00, -2.6012e+00,
         -1.8558e-01, -2.1471e+00,  1.6330e+00,  9.4344e-01, -8.4492e-02],
        [-3.4119e+00, -3.9334e+00, -4.3798e+00,  1.0747e-01,  5.8935e+00,
          3.0832e+00, -1.8

2. 添加激活函数

In [75]:
model_2 = Activation_Net(28*28, 300, 100, 10)
criterion_2 = nn.CrossEntropyLoss()
optimizer_2 = optim.SGD(model_2.parameters(), lr = learning_rate)

In [76]:
num = 1000
epoch = 0
for data in train_loader:
    epoch += 1
    # forward
    img, label = data
    img = img.view(img.size(0), -1)
    
    img = Variable(img)
    label = Variable(label)
    
    out = model_2(img)
    loss = criterion_2(out, label)
    
    #backward
    optimizer_2.zero_grad()
    loss.backward()
    optimizer_2.step()
    
    if epoch % 100 == 0:
        print('epoch: {}/{}, loss: {:.6f}'.format(epoch, num, loss.item()))
    
    if epoch==num:
        break

epoch: 100/1000, loss: 2.191514
epoch: 200/1000, loss: 1.933417
epoch: 300/1000, loss: 1.816343
epoch: 400/1000, loss: 1.834052
epoch: 500/1000, loss: 1.209747
epoch: 600/1000, loss: 1.086104
epoch: 700/1000, loss: 0.943458
epoch: 800/1000, loss: 1.344618
epoch: 900/1000, loss: 0.733659
