In [4]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

## `nn.Module`
* 必须有`__init__`和`forward()`方法
* 其他方法：
    * `add_module(name, module)`
    * `apply(fn)`: 对每个submodule应用fn函数，一般用在模型参数初始化
    * `bfloat16()`: 转换所有的浮点类型，不改变int
    * `buffers()`: 每个参数有一个附属的统计信息（size, type, e.etc.），称为buffer。也可以成为一个模块的状态
    * `cpu(), cuda()`: parameters, buffers移动到相应设备上
    * `eval()`: 跟train模式主要在Dropout和BatchNorm上有所不同
    * `get_parameter(string), get_submodule(str)`
    * `load_state_dict(state_dict, strict=True)`: 载入参数和buffer
    * `named_parameters()`
    * `requires_grad_(bool)`: 是否需要梯度下降参数更新？


In [6]:
class NeuralNetwork(nn.Module):
    def __init__(self): # 初始化参数，定义用到的 层/模块
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten() #start_dim=1, end_dim=-1, 第一个维度是batchsize，默认不进行flatten
        self.linear_relu_stack = nn.Sequential(nn.Linear(28*28, 512), nn.ReLU(),
                                               nn.Linear(512, 512), nn.ReLU(),
                                               nn.Linear(512, 10)) #按照顺序传入一些层
    
    def forward(self, x): 
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [8]:
model = NeuralNetwork().to(device) # 把模型放到GPU内存中
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [9]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(dim=1)
y_pred

tensor([3], device='cuda:0')

In [11]:
flatten = nn.Flatten()
flatten(X).size()

torch.Size([1, 784])

In [16]:
for name, param in model.named_parameters():
    print(name, param.size(), param[:2])

linear_relu_stack.0.weight torch.Size([512, 784]) tensor([[ 0.0038,  0.0113, -0.0304,  ..., -0.0030,  0.0247,  0.0317],
        [-0.0342, -0.0272,  0.0207,  ..., -0.0004, -0.0086,  0.0351]],
       device='cuda:0', grad_fn=<SliceBackward0>)
linear_relu_stack.0.bias torch.Size([512]) tensor([0.0234, 0.0142], device='cuda:0', grad_fn=<SliceBackward0>)
linear_relu_stack.2.weight torch.Size([512, 512]) tensor([[-0.0423,  0.0232, -0.0307,  ...,  0.0003,  0.0152,  0.0198],
        [-0.0026,  0.0210, -0.0260,  ...,  0.0045, -0.0164,  0.0418]],
       device='cuda:0', grad_fn=<SliceBackward0>)
linear_relu_stack.2.bias torch.Size([512]) tensor([0.0343, 0.0355], device='cuda:0', grad_fn=<SliceBackward0>)
linear_relu_stack.4.weight torch.Size([10, 512]) tensor([[-0.0063, -0.0306, -0.0329,  ..., -0.0324, -0.0285,  0.0027],
        [-0.0033,  0.0176, -0.0400,  ..., -0.0416, -0.0363,  0.0387]],
       device='cuda:0', grad_fn=<SliceBackward0>)
linear_relu_stack.4.bias torch.Size([10]) tensor([-0.016

In [17]:
param

Parameter containing:
tensor([-0.0169, -0.0008, -0.0041,  0.0251, -0.0143,  0.0183, -0.0019, -0.0154,
         0.0146,  0.0401], device='cuda:0', requires_grad=True)

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [5]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

In [7]:
@torch.no_grad() #这个函数里面不需要进行梯度运算
def init_weights(m):
    print(m)
    if type(m) == nn.Linear:
        m.weight.fill_(1.0)
        print(m.weight)

net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
net.apply(init_weights)

Linear(in_features=2, out_features=2, bias=True)
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
Linear(in_features=2, out_features=2, bias=True)
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)


Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)