# Pytorch 全相连网络(Full Connected Network)

In [1]:
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange

导入MNIST数据集

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torch.nn.functional as F
import torchvision

# hyper-parameter
training_epoch=100
num_classes = 10
learning_rate=1e-3
batch_size=1000

# 查看Pytorch是否支持GPU
GPU_FLAG = torch.cuda.is_available()
print('CUDA available?', GPU_FLAG)

# Mnist digits dataset
train_data = torchvision.datasets.MNIST(
    root='data/',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=True,
)

test_data = torchvision.datasets.MNIST(
    root='data/',
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True,
)

train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

CUDA available? True


## 正则化实现(regularization)

### L1正则化
`nn.L1Loss()`

### L2正则化
`torch.optim.Adam(weight_decay=1e-8)`


## Batch Norm
``
nn.BatchNorm1d(input_tensor)
nn.BatchNorm2d(input_tensor)
nn.BatchNorm3d(input_tensor)
``

创建网络

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = F.softmax(self.fc2(out))
        return out

net=Net()

# 将模型的参数送到GPU中
if GPU_FLAG==True:
    net.cuda()
print(net) # 输出模型结构

criterion = nn.CrossEntropyLoss() # 定义loss函数
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)


检验 estimaton

In [4]:
def evaluate():
    correct = 0
    total = 0
    for images, labels in test_loader:
        if GPU_FLAG:
            images = Variable(images.view(-1, 28 * 28)).cuda()
            labels = labels.cuda()
        else:
            images = Variable(images.view(-1, 28 * 28))
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()

    return float(correct) / total

training

In [5]:
for i in xrange(1, 1 + training_epoch):
    cost = 0.0
    for _, (images, labels) in enumerate(train_loader):
        if GPU_FLAG:
            x = Variable(images).view(-1, 28 ** 2).cuda()
            y = Variable(labels).cuda()
        else:
            x = Variable(images).view(-1, 28 ** 2)
            y = Variable(labels)

        optimizer.zero_grad()
        y_ = net(x)
        loss = criterion(y_, y)
        loss.backward()
        optimizer.step()

        cost += loss
    cost /= len(train_loader)
    if i % 10 == 0:
        print('Epoch %s / %s, training loss: %s, test accuracy: %s' %
              (i, training_epoch, float(cost), evaluate()))


Epoch 10 / 100, training loss: 0.09600746631622314, test accuracy: 0.9760833333333333
Epoch 20 / 100, training loss: 0.04198797419667244, test accuracy: 0.9907666666666667
Epoch 30 / 100, training loss: 0.02025996521115303, test accuracy: 0.9967
Epoch 40 / 100, training loss: 0.010129672475159168, test accuracy: 0.99945
Epoch 50 / 100, training loss: 0.0055337222293019295, test accuracy: 0.9997333333333334
Epoch 60 / 100, training loss: 0.002830269280821085, test accuracy: 0.9999666666666667
Epoch 70 / 100, training loss: 0.0016713348450139165, test accuracy: 1.0
Epoch 80 / 100, training loss: 0.0010266865137964487, test accuracy: 1.0
Epoch 90 / 100, training loss: 0.0006690657464787364, test accuracy: 1.0
Epoch 100 / 100, training loss: 0.00043571399874053895, test accuracy: 1.0
