## torch.nn

Module 是其核心数据结构

In [1]:
import torch as t
from torch import nn
from torch.autograd import Variable as V


In [2]:
class Linear(nn.Module):   #继承nn.Module
    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.w = nn.Parameter(t.randn(in_features, out_features))   #nn.Parameter是一种特殊的Variable,默认需要求导
        self.b = nn.Parameter(t.randn(out_features))
        
    def forward(self, x):
        x = x.mm(self.w)
        return x + self.b.expand_as(x)
    
layer = Linear(4, 3)       #全连接层
input = V(t.randn(2, 4))
output = layer(input)
output

tensor([[ 2.0027, -0.4393,  3.7245],
        [-3.3274, -0.7227,  0.5685]], grad_fn=<ThAddBackward>)

In [4]:
for name, param in layer.named_parameters():
    print(name, param)

w Parameter containing:
tensor([[-1.0120,  0.0892,  0.1106],
        [ 0.2262,  0.6317,  0.4206],
        [ 0.5001, -0.4838,  1.0983],
        [-1.8336,  0.5101, -1.7111]], requires_grad=True)
b Parameter containing:
tensor([-0.5480, -0.4312,  1.7770], requires_grad=True)


In [5]:
nn.Parameter??    #查看Parameter类的源代码

In [9]:
#多层感知机（两个全连接层，采用sigmoid函数作为激活函数）
class Perceptron(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        nn.Module.__init__(self)
        self.layer1 = Linear(in_features, hidden_features)
        self.layer2 = Linear(hidden_features, out_features)
        
    def forward(self, x):
        x = self.layer1(x)
        x = t.sigmoid(x)
        return self.layer2(x)
    
perceptron = Perceptron(3, 4, 1)


实际上，pytorch实现了神经网络中绝大多数的layer

In [10]:
nn.Conv2d??

In [11]:
nn.AvgPool2d??

In [12]:
nn.Linear??

In [None]:
nn.BatchNorm2d??

### 激活函数

nn.Sequential 是一个特殊的Module, 包含几个子module，前向传播时会将输入一层接一层传递下去。

In [13]:
#Sequential 的三种写法

net1 = nn.Sequential()
net1.add_module('conv', nn.Conv2d(3, 3, 3))
net1.add_module('batchnorm', nn.BatchNorm2d(3))
net1.add_module('activation_layer', nn.ReLU())      #激活函数可作为独立的layer使用

net2 = nn.Sequential(nn.Conv2d(3, 3, 3), nn.BatchNorm2d(3), nn.ReLU())

from collections import OrderedDict
net3 = nn.Sequential(OrderedDict([('conv1', nn.Conv2d(3, 3, 3)), ('bn1', nn.BatchNorm2d(3)), ('relu1', nn.ReLU())]))

print('net1: ', net1)
print('net2: ', net2)
print('net3: ', net3)

net1:  Sequential(
  (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation_layer): ReLU()
)
net2:  Sequential(
  (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)
net3:  Sequential(
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
)


In [14]:
net1.conv, net2[1], net3.relu1

(Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1)),
 BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 ReLU())

### RNN  RNNCell

In [15]:
input = V(t.randn(2, 3, 4))
lstm = nn.LSTM(4, 3, 1)
h0 = V(t.randn(1, 3, 3))
c0 = V(t.randn(1, 3, 3))
out, hn = lstm(input, (h0, c0))
out

tensor([[[-0.5134, -0.2633, -0.0370],
         [-0.3588,  0.2899,  0.1300],
         [-0.1729, -0.1179,  0.0504]],

        [[-0.5915, -0.0094,  0.2564],
         [-0.3588,  0.3240,  0.1656],
         [-0.1866, -0.0089,  0.0474]]], grad_fn=<CatBackward>)

In [16]:
input = V(t.randn(2, 3, 4))
lstm = nn.LSTMCell(4, 3)
hx = V(t.randn(3, 3))
cx = V(t.randn(3, 3))
out = []
for i in input:
    hx, cx = lstm(i, (hx, cx))
    out.append(hx)
t.stack(out)

tensor([[[-0.1346,  0.1886, -0.1895],
         [ 0.1065, -0.2896,  0.2246],
         [-0.1018,  0.1794, -0.0827]],

        [[-0.1695,  0.1479,  0.0184],
         [-0.0086,  0.0190, -0.0218],
         [ 0.0877,  0.2225,  0.1486]]], grad_fn=<StackBackward>)

In [17]:
t.stack??

### 优化器

In [18]:
class Net (nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(3, 6, 5), nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d(6, 16, 5), nn.ReLU(), nn.MaxPool2d(2, 2))
        self.classifier = nn.Sequential(nn.Linear(16*5*5, 120), nn.ReLU(), nn.Linear(120, 84), nn.ReLU(), nn.Linear(84, 10))
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 16*5*5)
        x = self.classifier(x)
        return x
    
net = Net()

In [19]:
from torch import optim 
optimizer = optim.SGD(params = net.parameters(), lr=1)
optimizer.zero_grad()   #等价与net.zero_grad()

input = V(t.randn(1, 3, 32, 32))
output = net(input)
output.backward(output)

optimizer.step()

需调整学习率， 可新建一个optimizer

In [20]:
optimizer = optim.SGD([{'params': net.features.parameters()}, {'params': net.classifier.parameters(), 'lr': 0.01}], lr = 0.1)


### nn.functional

In [22]:
input = V(t.randn(2, 3))

model = nn.Linear(3, 4)
output1 = model(input)

output2 = nn.functional.linear(input, model.weight, model.bias)

output1 == output2

tensor([[1, 1, 1, 1],
        [1, 1, 1, 1]], dtype=torch.uint8)

In [23]:
b = nn.functional.relu(input)
b2 = nn.ReLU()(input)
b == b2

tensor([[1, 1, 1],
        [1, 1, 1]], dtype=torch.uint8)

### 参数的初始化

nn.init模块实现了常用的初始化策略。

In [25]:
from torch.nn import init
linear = nn.Linear(3, 4)

init.xavier_normal_(linear.weight)


Parameter containing:
tensor([[-0.6411, -0.5984,  0.0877],
        [ 0.7913, -0.2122,  0.9752],
        [-0.0548,  0.3535, -0.5248],
        [-0.2230,  0.8966,  0.5706]], requires_grad=True)

In [26]:
nn.Module??

In [27]:
net


Net(
  (features): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [28]:
#保存模型
t.save(net.state_dict(), 'net.pth')

#加载保存的模型
net2 =Net()
net2.load_state_dict(t.load('net.pth'))

#将模型放到GPU上运行，只需：

model = model.cuda()

input.cuda()


### 实现ResNet

In [29]:
from torch import nn
import torch as t
from torch.nn import functional as F


In [30]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1, shortcut=None):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(nn.Conv2d(inchannel, outchannel, 3, stride, 1, bias =False),
                                 nn.BatchNorm2d(outchannel), nn.ReLU(inplace=True),
                                 nn.Conv2d(outchannel, outchannel, 3, 1, 1, bias =False),
                                 nn.BatchNorm2d(outchannel))
        self.right = shortcut
        
    def forward(self, x):
        out = self.left(x)
        residual = x if self.right is None else self.right(x)
        out += residual
        return F.relu(out)
    
class ResNet(nn.Module):
    
    def __init__(self, num_classes=1000):
        super(ResNet, self).__init__()
        self.pre = nn.Sequential(nn.Conv2d(3, 64, 7, 2, 3, bias=False),
                                nn.BatchNorm2d(64), nn.ReLU(inplace=True),
                                nn.MaxPool2d(3, 2, 1))
        self.layer1 = self._make_layer(64, 128, 3)
        self.layer2 = self._make_layer(128, 256, 4, stride = 2)
        self.layer3 = self._make_layer(256, 512, 6, stride = 2)
        self.layer4 = self._make_layer(512, 512, 3, stride = 2)
        
        self.fc = nn.Linear(512, num_classes)
        
    def _make_layer(self, inchannel, outchannel, block_num, stride = 1):
        
        shortcut = nn.Sequential(nn.Conv2d(inchannel, outchannel, 1, stride, bias=False), 
                                nn.BatchNorm2d(outchannel))
        layers = []
        layers.append(ResidualBlock(inchannel, outchannel, stride, shortcut))
        
        for i in range(1, block_num):
            layers.append(ResidualBlock(outchannel, outchannel))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.pre(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = F.avg_pool2d(x, 7)
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [32]:
model = ResNet()
input = t.autograd.Variable(t.randn(1, 3, 224, 224))
out = model(input)
out

tensor([[ 0.1894, -0.3569,  0.3082,  0.4709, -0.1184,  0.5463,  0.1248, -0.2602,
          0.3080, -0.4432,  0.0798, -0.3327,  0.2541, -0.4049, -0.0502,  0.1631,
         -0.0280, -0.0694,  0.1476, -0.4899, -0.3459,  0.0744, -0.5930,  0.5776,
          0.1642,  0.5396,  0.0666, -0.1787, -0.1806, -0.2015,  0.0343,  0.1598,
         -0.1636, -0.4603, -0.1124,  0.1034,  0.5849,  0.0224, -0.0573,  0.0337,
          0.3540, -0.7250, -0.0857, -0.1482, -0.3968,  0.5321, -0.4246,  0.2845,
         -0.4286,  0.3081,  0.7063,  0.4696,  0.0683,  0.0688, -0.5384, -0.0954,
          0.0485, -0.0693, -0.1764,  0.1659,  0.2950, -0.6973,  0.4064,  0.7254,
          0.0145,  0.5843, -0.0018, -0.0244,  0.6111, -0.2469, -0.6280, -0.2148,
         -0.5064,  0.0384,  0.4206, -0.4653,  0.0713, -0.1483,  0.0898, -0.2034,
          0.4141, -0.0493, -0.1261,  0.8232, -0.0769,  0.2669, -0.0972,  0.0413,
          0.0911,  0.1049, -0.1080,  0.2174,  0.2858, -0.0021, -0.1397,  0.3337,
          0.1673,  0.2258,  