In [1]:
%matplotlib inline

自定义 - Self-define
====
Pytorch的forward可以自定义向前向后传播方式；定义新型网络可以用矩阵代替；loss函数将在本节通过两种方式显示
>Python2.7 + Pytorch
>
>MNIST

In [2]:
# -*- coding: utf-8 -*-
# !/usr/bin/env python

from __future__ import print_function

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torch.nn.functional as F
import torchvision

# Hyperparameters
training_episodes = 100
batch_size = 5000
num_classes = 10

GPU_FLAG = torch.cuda.is_available()
print('CUDA available?', GPU_FLAG)

CUDA available? True


In [3]:
# Mnist digits dataset
train_data = torchvision.datasets.MNIST(
    root='data/',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=True,
)

test_data = torchvision.datasets.MNIST(
    root='data/',
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True,
)

train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)

NameError: name 'BATCH_SIZE' is not defined

方案1：定义在函数内部，作为网络的一部分

In [None]:
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()

        self.fc1 = nn.Linear(28 * 28, 256)
        torch.nn.init.xavier_uniform(self.fc1.weight)
        self.fc2 = nn.Linear(256, num_classes)
        torch.nn.init.xavier_uniform(self.fc2.weight)

        self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)

    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = self.fc2(out)
        return out

    def get_loss(self, x, y):
        # loss
        out = F.log_softmax(x)
        out = torch.mean(-torch.sum(y * out, -1))
        return out

    def train_model(self, x, y):
        self.train()
        self.optimizer.zero_grad()
        y_pred = self.forward(x)
        loss = self.get_loss(y_pred, y)
        loss.backward()
        self.optimizer.step()
        return loss

    def get_accuracy(self):
        self.eval()
        self.accuracy = 0.0
        for _ in xrange(len(test_loader)):
            batch_images, batch_labels = test_loader.__iter__().next()
            batch_images = Variable(batch_images.view(-1, 28 ** 2)).cuda()
            batch_labels = Variable(batch_labels).cuda()
            pred = self.forward(batch_images)
            acc = torch.max(pred.data, 1)[1] == batch_labels.data
            acc = acc.float().mean()
            self.accuracy += acc
        return self.accuracy / len(test_loader)


net1 = Net1()
# GPU运算
net1.cuda()
print(net1)

In [None]:
def one_hot(x, num_classes=N_CLASS):
    x_one_hot = torch.LongTensor(x).view(-1, 1)
    x_one_hot = torch.zeros(x_one_hot.size(0), num_classes).scatter_(1, x_one_hot, 1)
    return x_one_hot

In [None]:
for i in xrange(1, 1 + training_episodes):
    cost = 0.0
    for _ in xrange(len(train_loader)):
        images, labels = train_loader.__iter__().next()
        x = Variable(images).view(-1, 28 ** 2).cuda()
        y = Variable(one_hot(labels)).cuda()
        loss = net1.train_model(x=x, y=y)
        cost += loss.data[0]
    cost /= len(train_loader)
    if i % 10 == 0:
        print('Epoch %s / %s, training cost: %s, accuracy: %s' % (i, training_episodes, cost, net1.get_accuracy()))

方案二：定义一个单独的Loss类<br>
本质上方案一二相差不打，但是方案二更加规范，源码中采用的方案二

In [None]:
class myCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(myCrossEntropyLoss, self).__init__()

    def forward(self, x, y):
        out = F.log_softmax(x)
        out = torch.mean(-torch.sum(y * out, -1))
        return out

In [None]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()

        self.fc1 = nn.Linear(28 * 28, 256)
        torch.nn.init.xavier_uniform(self.fc1.weight)
        self.fc2 = nn.Linear(256, num_classes)
        torch.nn.init.xavier_uniform(self.fc2.weight)

        # loss & optimizer
        self.criterion = myCrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)

    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = self.fc2(out)
        return out

    def train_model(self, x, y):
        self.train()
        self.optimizer.zero_grad()
        y_pred = self.forward(x)
        loss = self.criterion(y_pred, y)
        loss.backward()
        self.optimizer.step()
        return loss

    def get_accuracy(self):
        self.eval()
        self.accuracy = 0.0
        for _ in xrange(len(test_loader)):
            batch_images, batch_labels = test_loader.__iter__().next()
            batch_images = Variable(batch_images.view(-1, 28 ** 2)).cuda()
            batch_labels = Variable(batch_labels).cuda()
            pred = self.forward(batch_images)
            acc = torch.max(pred.data, 1)[1] == batch_labels.data
            acc = acc.float().mean()
            self.accuracy += acc
        return self.accuracy / len(test_loader)
net2 = Net2()
# GPU运算
net2.cuda()
print(net2)