In [7]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)

tensor([[-0.1082, -0.0844, -0.1760, -0.0311,  0.1024,  0.2020, -0.3884, -0.0538,
          0.0557, -0.0283],
        [ 0.0370,  0.0273, -0.0174, -0.0252,  0.1751,  0.3728, -0.3794, -0.2183,
          0.0980,  0.1319]], grad_fn=<AddmmBackward0>)

In [10]:
#自定义块
class MLP(nn.Module):
    #用模型参数声明层，这里声明两个全连接层
    def __init__(self):
        super().__init__()
        self.hidden=nn.Linear(20,256)#隐藏层
        self.out=nn.Linear(256,10)#输出层
    #定义模型的前向传播
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

In [12]:
net=MLP()
net(X)

tensor([[ 0.3832, -0.2268,  0.0445,  0.1316,  0.2258,  0.0087,  0.1151,  0.1175,
         -0.0898,  0.0556],
        [ 0.1866, -0.0287, -0.0787, -0.0696,  0.2536,  0.0255,  0.0338,  0.1507,
         -0.0139,  0.2231]], grad_fn=<AddmmBackward0>)

In [None]:
#顺序块
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            # 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
            # 变量_modules中。_module的类型是OrderedDict
            self._modules[str(idx)] = module
    def forward(self, X):
        #OrderedDict保证了按照成员添加的顺序遍历它们
        for block in self._modules.values():
            X=block(X)
        return X
#_modules的主要优点是：在模块的参数初始化过程中，系统知道在_modules字典中查找需要初始化参数的子块。

In [14]:
net=MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)

tensor([[-0.2541,  0.0225,  0.0455,  0.2188,  0.1505,  0.0261,  0.0524, -0.1699,
         -0.1394, -0.1412],
        [-0.4482, -0.0255,  0.1064,  0.3339,  0.3033,  0.0147, -0.0299, -0.0396,
         -0.2393, -0.0879]], grad_fn=<AddmmBackward0>)

In [None]:
#5.3参数管理
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size=(2, 4))
net(X)
#1.参数访问
print(net[2].state_dict())
#一次性访问所有参数
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

OrderedDict([('weight', tensor([[-0.0792, -0.2713,  0.0559,  0.1559,  0.1011,  0.1300, -0.0889,  0.0149]])), ('bias', tensor([-0.1891]))])
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))


(tensor([ 0.0095,  0.0084, -0.0018, -0.0092]), tensor(0.))

In [None]:
#2.参数初始化
#内置初始化
def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data[0],net[0].bias.data[0]
#自定义初始化

(tensor([ 0.0166,  0.0009, -0.0033,  0.0089]), tensor(0.))

In [23]:
#5.4自定义层
#不带参数的简单层
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, X):
        return X - X.mean()

layer=CenteredLayer()
net=nn.Sequential(nn.Linear(8,128),CenteredLayer())
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [24]:
#带参数的层
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight=nn.Parameter(torch.randn(in_units,units))
        self.bias=nn.Parameter(torch.randn(units,))
    def forward(self,X):
        linear=torch.matmul(X,self.weight.data)+self.bias.data
        return F.relu(linear)
#实例化MyLinear类并访问其模型参数
linear=MyLinear(5,3)
linear.weight

Parameter containing:
tensor([[-2.6321e-01,  4.6640e-01,  1.5299e+00],
        [-1.2598e+00, -2.1345e+00, -9.0781e-01],
        [ 1.8921e+00,  1.7823e-01, -3.4245e-01],
        [ 5.2429e-01,  2.0576e-01,  4.7465e-06],
        [-1.4339e+00,  1.6247e+00, -1.3870e+00]], requires_grad=True)

In [27]:
#执行前向传播
linear(torch.rand(2,5))

tensor([[1.5094, 0.1120, 0.0000],
        [0.4024, 0.3492, 0.0000]])

In [28]:
#使用自定义层构建模型
net=nn.Sequential(MyLinear(64,8),MyLinear(8,1))
net(torch.rand(2,64))

tensor([[10.9772],
        [ 0.0000]])

In [29]:
#5.5读写文件
#1.加载和保存张量
x=torch.arange(4)
torch.save(x,'x-file')
x2=torch.load('x-file')
x2

  x2=torch.load('x-file')


tensor([0, 1, 2, 3])

In [31]:
#加载和保存模型参数
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)
    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))

net=MLP()
X=torch.randn(size=(2,20))
Y=net(X)
torch.save(net.state_dict(),'mlp.params')
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

  clone.load_state_dict(torch.load('mlp.params'))


MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [32]:
#验证
Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

In [33]:
#5.6 GPU
import torch
from torch import nn

torch.device('cpu'),torch.device('cuda'),torch.device('cuda:1')

(device(type='cpu'), device(type='cuda'), device(type='cuda', index=1))

In [35]:
#查询可用gpu数量
torch.cuda.device_count()

1

In [None]:
#查询张量所在设备
x=torch.tensor([1,2,3])
x.device

device(type='cpu')

In [38]:
def try_gpu(i=0): #@save
    """如果存在，则返回gpu(i)，否则返回cpu()"""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

In [39]:
#存储在GPU上
X=torch.ones(2,3,device=try_gpu())
X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [40]:
#神经网络与GPU
net=nn.Sequential(nn.Linear(3,1))
net=net.to(device=try_gpu())