In [1]:
import torch
import torch.nn as nn  

net = nn.Sequential(nn.Linear(4,3),nn.ReLU(),nn.Linear(3,1))

print(net)
x = torch.rand(2,4)
y = net(x).sum()

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


## 访问模型参数

可以使用`Module`类的`parameters()`和`named_parameters()`方法来访问所有的参数（以迭代器的形式返回）。`named_parameters`会返回参数的名称.

In [2]:
print(type(net.named_parameters()))

for name,param in net.named_parameters():
    print(name,param.size())

<class 'generator'>
0.weight torch.Size([3, 4])
0.bias torch.Size([3])
2.weight torch.Size([1, 3])
2.bias torch.Size([1])


从上面可知`named_parameters()`返回的是迭代器。　使用`Sequential`构建的网络返回的name为层的编号，还可以使用`[]`来访问网络的任一层

In [3]:
for name,param in net[0].named_parameters():
    print(name,param.size(),type(param))

weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>
bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>


`param`的类型为`torch.nn.parameter.Parameter`，这是一个`Tensor`的子类，和`Tensor`不同的是，如果一个`Tensor`是`Parameter`，它会自动的被添加到模型参数列表里。

`Parameter`拥有`Tensor`的所有属性，可以使用`data`来访问数据，使用`grad`访问梯度

In [4]:
class MyModel(nn.Module):
    def __init__(self,**kwargs):
        super(MyModel,self).__init__(**kwargs)
        self.weight1 = nn.Parameter(torch.rand(20,20))
        self.weight2 = torch.rand(20,20)

    def forward(self,x):
        pass

n = MyModel()
for name,param in n.named_parameters():
    print(name)

weight1


In [5]:
weight_0 = list(net[0].parameters())[0]
print(weight_0.data)
print(weight_0.grad)


tensor([[-0.3461,  0.3694,  0.1778, -0.3958],
        [-0.0034, -0.2368,  0.2241, -0.2926],
        [ 0.3248, -0.2238, -0.2350,  0.2400]])
None


In [7]:
# 分别对权重和bias使用不同的初始化方法
for name,param in net.named_parameters():
    if 'weight' in name:
        nn.init.normal_(param,mean=0,std=0.01)
        print(name,param.data)

    elif 'bias' in name:
        nn.init.constant_(param,val=0)
        print(name,param.data)

0.weight tensor([[ 0.0031, -0.0003, -0.0184,  0.0004],
        [ 0.0011, -0.0036,  0.0086,  0.0271],
        [-0.0099, -0.0080,  0.0083,  0.0156]])
0.bias tensor([0., 0., 0.])
2.weight tensor([[-0.0035,  0.0088, -0.0207]])
2.bias tensor([0.])


In [9]:
# 自定义初始化方法 

def normal_(tensor,mean=0,std=0.01):
    with torch.no_grad(): # 不记录梯度，改变tensor的值
        return tensor.normal_(mean,std)

def init_weight(tensor):
    with torch.no_grad():
        tensor.uniform_(-10,10)
        tensor *= (tensor.abs() >= 5).float()

for name,param in net.named_parameters():
    if 'weigh' in name:
        init_weight(param)
        print(name,param)

0.weight Parameter containing:
tensor([[ 0.0000, -0.0000,  5.9529, -0.0000],
        [-9.6475, -0.0000, -0.0000,  7.9089],
        [-9.5902, -7.5929,  0.0000, -5.8976]], requires_grad=True)
2.weight Parameter containing:
tensor([[ 9.7117, -6.2379, -7.2058]], requires_grad=True)


## 共享模型参数
在多个层之间共享模型参数，有两种方法：
- `forward`函数多次调用同一个层
- 传入`Sequential`的模块市同一个`Module`实例的话参数也是共享的

In [10]:
linear = nn.Linear(1,1,bias=False)
net= nn.Sequential(linear,linear)
print(net)
for name,param in net.named_parameters():
    nn.init.constant_(param,val=3)
    print(name,param.data)

x = torch.ones(1,1)
y = net(x)
print(y)
y.backward()
print(net[0].weight.grad)

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=False)
  (1): Linear(in_features=1, out_features=1, bias=False)
)
0.weight tensor([[3.]])
tensor([[9.]], grad_fn=<MmBackward>)
tensor([[6.]])


## GPU计算

In [14]:
import torch
import torch.nn as nn 

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_capability(0))

True
1
0
GeForce GTX 1060 6GB
(6, 1)


In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

x = torch.tensor([1,2,3],device=device)

y = torch.rand(3,4)
y = y.to(device)

print(x)
print(y)

tensor([1, 2, 3], device='cuda:0')
tensor([[0.6596, 0.8143, 0.7888, 0.4173],
        [0.0165, 0.9277, 0.0844, 0.9370],
        [0.6718, 0.2953, 0.8057, 0.5329]], device='cuda:0')


In [18]:
net = nn.Linear(3,1)
list(net.parameters())[0].device

device(type='cpu')

In [19]:
net.cuda()
list(net.parameters())[0].device

device(type='cuda', index=0)

In [21]:
z = torch.rand(4,5)
z.cuda()
print(z)
z = z.cuda()
print(z)

tensor([[0.9744, 0.5742, 0.1584, 0.6088, 0.7109],
        [0.7951, 0.6606, 0.1587, 0.3705, 0.0154],
        [0.4641, 0.8261, 0.0737, 0.6368, 0.7628],
        [0.9285, 0.8652, 0.8189, 0.4539, 0.2267]])
tensor([[0.9744, 0.5742, 0.1584, 0.6088, 0.7109],
        [0.7951, 0.6606, 0.1587, 0.3705, 0.0154],
        [0.4641, 0.8261, 0.0737, 0.6368, 0.7628],
        [0.9285, 0.8652, 0.8189, 0.4539, 0.2267]], device='cuda:0')
