# Pytorch - Self-Define自定义



## 自定义loss函数


方式1：定义为一个类<br>
``
class TF_Sparse_Softmax_Cross_Entropy_Loss(nn.Module):
    def __init__(self):
        super(TF_Sparse_Softmax_Cross_Entropy_Loss, self).__init__()
    def forward(self, input, target):
        loss_op = -torch.sum((F.log_softmax(input, dim=-1) * target), dim=-1).mean()
        return loss_op
``
<br>
方式2：作为网络的一部分<br>
``
class Dense(nn.Module):
    def forward(self, input):
            output = torch.matmul(input, self.weight.t()) + self.bias
            return output
``

## 自定义layer
以全相连网络为例。
在定义矩阵时，将矩阵的大小逆序。因为这些网络的矩阵需要用Parameter包装，通过转置的方式访问。

``
class Dense(nn.Module):
    def __init__(self, in_features, out_features):
        super(Dense, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.zeros(out_features, in_features))
        self.bias = Parameter(torch.zeros(out_features))
    def forward(self, input):
        output = torch.matmul(input, self.weight.t()) + self.bias
        return output
``


## 自定义 optimizer
Pytorch中自定义优化器比较复杂，在此不多赘述。感兴趣的读者可参考源码，仿写。

类似的，Pytorch也能像TensorFlow一样改变梯度。
### 手动完成梯度传播
``optimizer.step()``等价于<brr>
``
for f in model.parameters():
    torch.nn.utils.clip_grad_norm()
    f.data.sub_(f.grad.data * learning_rate)
``

In [1]:
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torch.nn.functional as F
import torchvision

# hyper-parameter
training_epoch = 100
num_classes = 10
learning_rate = 1e-1
batch_size = 1000

# Mnist digits dataset
train_data = torchvision.datasets.MNIST(
    root='data/',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=True,
)

test_data = torchvision.datasets.MNIST(
    root='data/',
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True,
)

train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_loader = Data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

from torch.nn.parameter import Parameter


class Dense(nn.Module):
    def __init__(self, in_features, out_features):
        super(Dense, self).__init__()
        self.in_features = in_features
        self.out_features = out_features

        self.weight = Parameter(torch.zeros(out_features, in_features))
        self.bias = Parameter(torch.zeros(out_features))

    def forward(self, input):
        output = torch.matmul(input, self.weight.t()) + self.bias
        return output


# 创建网络
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(28 * 28, 256)
        # self.fc2 = nn.Linear(256, num_classes)
        self.fc2 = Dense(in_features=256, out_features=num_classes)

    def forward(self, x):
        out = F.relu(self.fc1(x))
        out = self.fc2(out)
        return out

    def get_loss(self, input, target):
        loss_op = -torch.sum((F.log_softmax(input, dim=-1) * target), dim=-1).mean()
        return loss_op


class TF_Sparse_Softmax_Cross_Entropy_Loss(nn.Module):
    def __init__(self):
        super(TF_Sparse_Softmax_Cross_Entropy_Loss, self).__init__()

    def forward(self, input, target):
        loss_op = -torch.sum((F.log_softmax(input, dim=-1) * target), dim=-1).mean()
        return loss_op


# 查看Pytorch是否支持GPU
GPU_FLAG = torch.cuda.is_available()
print('CUDA available?', GPU_FLAG)
# 将模型的参数送到GPU中
if GPU_FLAG == True:
    model = Net().cuda()
    # 定义loss函数
    # criterion = nn.CrossEntropyLoss().cuda()
    criterion = TF_Sparse_Softmax_Cross_Entropy_Loss().cuda()
print(model)  # 输出模型结构
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# one_hot_encoding
def one_hot(x, num_classes=10):
    x_one_hot = torch.LongTensor(x).view(-1, 1)
    x_one_hot = torch.zeros(x_one_hot.size(0), num_classes).scatter_(1, x_one_hot, 1)
    return x_one_hot


for i in xrange(10):
    for _, (batch_images, batch_labels) in enumerate(train_loader):
        batch_images = Variable(batch_images).view(-1, 28 ** 2).cuda()
        batch_labels_ont_hot = Variable(one_hot(batch_labels)).cuda()
        batch_labels = Variable(batch_labels).cuda()

        # forward
        batch_logits = model(batch_images)
        # compute loss
        # loss = criterion(batch_logits, batch_labels_ont_hot)
        loss = model.get_loss(batch_logits, batch_labels_ont_hot)
        # initial gradients
        optimizer.zero_grad()
        # compute gradients
        loss.backward()

        # update gradients
        # optimizer.step()
        for f in model.parameters():
            f.data.sub_(f.grad.data * learning_rate)

        print('loss: %-.5f, acc: %-.5f' %
              (float(loss), float((torch.argmax(batch_logits, dim=-1) == batch_labels).float().mean())))


CUDA available? True
Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Dense()
)
loss: 2.30258, acc: 0.09700
loss: 2.29881, acc: 0.28000
loss: 2.29455, acc: 0.46900
loss: 2.29054, acc: 0.51700
loss: 2.28744, acc: 0.39200
loss: 2.28173, acc: 0.57400
loss: 2.27772, acc: 0.52000
loss: 2.27353, acc: 0.44700
loss: 2.26831, acc: 0.49300
loss: 2.26301, acc: 0.44300
loss: 2.25822, acc: 0.43300
loss: 2.25360, acc: 0.48400
loss: 2.25045, acc: 0.50500
loss: 2.24293, acc: 0.58500
loss: 2.23779, acc: 0.59300
loss: 2.22530, acc: 0.62600
loss: 2.22009, acc: 0.58400
loss: 2.21340, acc: 0.62000
loss: 2.20444, acc: 0.63200
loss: 2.19184, acc: 0.63900
loss: 2.19070, acc: 0.61100
loss: 2.17900, acc: 0.58100
loss: 2.16739, acc: 0.59500
loss: 2.15500, acc: 0.63600
loss: 2.13929, acc: 0.65100
loss: 2.12724, acc: 0.65300
loss: 2.11739, acc: 0.62300
loss: 2.09119, acc: 0.65500
loss: 2.08123, acc: 0.63800
loss: 2.07224, acc: 0.61700
loss: 2.05157, acc: 0.65500
loss: 2.01760, acc: 0.663

loss: 0.37768, acc: 0.90100
loss: 0.37709, acc: 0.90700
loss: 0.38232, acc: 0.89300
loss: 0.38241, acc: 0.88600
loss: 0.41691, acc: 0.89500
loss: 0.36571, acc: 0.89200
loss: 0.37619, acc: 0.90800
loss: 0.36589, acc: 0.89300
loss: 0.38841, acc: 0.89300
loss: 0.41773, acc: 0.88500
loss: 0.42625, acc: 0.88100
loss: 0.39270, acc: 0.90400
loss: 0.38448, acc: 0.89000
loss: 0.39911, acc: 0.89700
loss: 0.40198, acc: 0.88500
loss: 0.35688, acc: 0.89200
loss: 0.38786, acc: 0.89100
loss: 0.38931, acc: 0.90100
loss: 0.36096, acc: 0.91100
loss: 0.40215, acc: 0.87900
loss: 0.34246, acc: 0.90300
loss: 0.41366, acc: 0.89100
loss: 0.36434, acc: 0.89200
loss: 0.38404, acc: 0.89500
loss: 0.41210, acc: 0.88200
loss: 0.33576, acc: 0.91800
loss: 0.33685, acc: 0.90700
loss: 0.37144, acc: 0.89900
loss: 0.34255, acc: 0.90400
loss: 0.37646, acc: 0.89400
loss: 0.39190, acc: 0.88400
loss: 0.39348, acc: 0.88100
loss: 0.38304, acc: 0.90500
loss: 0.35244, acc: 0.89800
loss: 0.39884, acc: 0.88000
loss: 0.41543, acc: 

loss: 0.30637, acc: 0.90400
loss: 0.29963, acc: 0.91500
loss: 0.26317, acc: 0.91400
loss: 0.28652, acc: 0.91000
loss: 0.32813, acc: 0.90800
loss: 0.31855, acc: 0.90500
loss: 0.26804, acc: 0.91900
loss: 0.31331, acc: 0.91000
loss: 0.30934, acc: 0.90900
loss: 0.33890, acc: 0.91300
loss: 0.34850, acc: 0.89800
loss: 0.28059, acc: 0.90500
loss: 0.30741, acc: 0.91200
loss: 0.31008, acc: 0.91600
loss: 0.33360, acc: 0.90700
loss: 0.32155, acc: 0.91100
