In [2]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchviz import make_dot
# find device
device = "cuda" if torch.cuda.is_available() else "cpu"
print("current device =",device)

# 定义一个神经网络Class
class MyNetwork(nn.Module):
    def __init__(self) -> None:
        # -> None 是一个hint，对代码没有影响，告诉我们这个函数的返回值类型是None
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
# 实例化 Instance
model = MyNetwork().to(device)
print(model)

current device = cuda
MyNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:

# 训练 training
X = torch.rand(1, 28,28, device=device)
logits = model(X)
pred_probabability = nn.Softmax(dim=1)(logits)
print(pred_probabability)
y_pred = pred_probabability.argmax(1)
print(f"Predicted class: {y_pred}")

tensor([[0.0990, 0.0969, 0.1034, 0.1002, 0.1004, 0.1011, 0.0928, 0.1166, 0.0964,
         0.0931]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
Predicted class: tensor([7], device='cuda:0')


给出ReLU和softmax的表达式
+ ReLU
$$
\operatorname{ReLU}(x)=(x)^{+}=\max (0, x)
$$
![ReLU](./img/ReLU.png)
+ Softmax
$$
\operatorname{Softmax}\left(x_i\right)=\frac{\exp \left(x_i\right)}{\sum_j \exp \left(x_j\right)}
$$

输出模型的结构

In [8]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: MyNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0022,  0.0310,  0.0166,  ...,  0.0066,  0.0298, -0.0330],
        [ 0.0111,  0.0037,  0.0351,  ...,  0.0002, -0.0257,  0.0057]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0129,  0.0180], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0138,  0.0329,  0.0402,  ...,  0.0320,  0.0119, -0.0442],
        [ 0.0202,  0.0232,  0.0311,  ...,  0.0360,  0.0041, -0.0410]],
       device='cuda:0', grad_fn=<SliceB