In [2]:
import torch
import torch.nn as nn

#####检测可使用的运算设备#####
if torch.cuda.is_available():
    CHIP="cuda"   #Nvidia - Compute Unified Device Architecture
elif torch.backends.mps.is_built():
    CHIP="mps"    #Apple Silicon - API Metal - Metal Performance Shaders
else:
    CHIP="cpu"
device = torch.device(CHIP)
print("运算用设备：",device)

运算用设备： mps


# 1 torch 向量(tensor)

Each torch.Tensor has a torch.dtype, torch.device, and torch.layout.

requires_grad:

- 在用户手动定义tensor时，参数requires_grad默认值是False。

- 而在Module中的层在定义时，相关tensor的requires_grad参数默认是True。

- 在计算图中，如果有一个输入的requires_grad是True，那么输出的requires_grad也是True。只有在所有输入的requires_grad都为False时，输出的requires_grad才为False。

backward():

- 只能对标量求导数（梯度）


detach():

- 返回一个新的tensor，并且这个tensor是从当前的计算图中分离出来的（截断计算图）。但是返回的tensor和原来的tensor是共享内存空间的。

In [None]:
a=torch.tensor([2, 4, 6], dtype=torch.float, device=device, requires_grad=True)
c=torch.tensor([1,2,3], dtype=torch.float, device=device, requires_grad=True)
print(a.grad)
print(a.size())

b=3*a*a+2*c*c*c
b1=b.sum()

print(b)
print(b1)
b1.backward()

print('梯度：',a.grad)
print('梯度：',c.grad)

print(a.is_leaf)
print(c.is_leaf)

### 向量操作

变形：

❌  view() （已过时）

✅  reshape()

- reshape方法更强大，可以认为a.reshape = a.view() + a.contiguous().view()。

- https://blog.csdn.net/Flag_ing/article/details/109129752

# 2 基础网络 torch.nn

## 2.1 线性层

``` python
CLASS torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
```

In [None]:
A_Linear_Layer=nn.Linear(2,3,bias=False,device=device)

a=torch.randn(5,2).to(device)
b=A_Linear_Layer(a)

print(a)
print(list(A_Linear_Layer.named_parameters()))
print(b)

## 2.2 其他基础层

池化层

``` python
CLASS torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
```

(N,C,H,W) 批量大小，通道数，图片高度，图片宽度

kernel_size ：表示做最大池化的窗口大小，可以是单个值，也可以是tuple元组

stride ：步长，可以是单个值，也可以是tuple元组

padding ：填充，可以是单个值，也可以是tuple元组

dilation ：控制窗口中元素步幅

return_indices ：布尔类型，返回最大值位置索引

ceil_mode ：布尔类型，为True，用向上取整的方法，计算输出形状；默认是向下取整。

In [None]:
a=torch.randn((1,3,8,8),requires_grad=True)
Pooling_Layer=nn.MaxPool2d((4,4),return_indices=True)
b=Pooling_Layer(a)

list(b)[0].backward(torch.ones_like(list(b)[0])) #b.backward(torch.ones(1,3,2,2))

print(a)
print(list(b)[0])
print(a.grad[0][0][0])


归一化层

``` python
CLASS torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True, device=None, dtype=None)
```

num_features：一般输入参数为batch_size×num_features×height×width，即为其中特征的数量

eps：分母中添加的一个值，目的是为了计算的稳定性，默认为：1e-5

momentum：一个用于运行过程中均值和方差的一个估计参数（我的理解是一个稳定系数，类似于SGD中的momentum的系数）

affine：当设为true时，会给定可以学习的系数矩阵gamma和beta


## 2.3 卷积层

``` python
CLASS torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
```

groups:分组，输入、输出通道均分组，均需被其整除

![卷积层groups](./pic/卷积层groups.jpg)



In [None]:
a=torch.randn((1,3,8,8),requires_grad=True,device=device)
Conv_Layer=nn.Conv2d(3,2,(4,4),device=device)
b=Conv_Layer(a)

b.backward(torch.ones_like(b))

print(a)
print(b)
print(a.grad)

## 2.4 循环层

nn.LSTM

初始化：

``` python
CLASS torch.nn.LSTM(*args, **kwargs)
```
![LSTM参数](pic/LSTM参数.png)

- input_size – The number of expected features in the input x
- hidden_size – The number of features in the hidden state h
- num_layers – Number of recurrent layers. E.g., setting num_layers=2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and - computing the final results. Default: 1
- bias – If False, then the layer does not use bias weights b_ih and b_hh. Default: True
- batch_first – If True, then the input and output tensors are provided as (batch, seq, feature) instead of (seq, batch, feature). Note that this does not apply to hidden or cell states. See the - Inputs/Outputs sections below for details. Default: False
- dropout – If non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout. Default: 0
- bidirectional – If True, becomes a bidirectional LSTM. Default: False
- proj_size – If > 0, will use LSTM with projections of corresponding size. Default: 0

输入：input, (h_0, c_0)
- input: (L,H_in)  (L,N,H_in)or(N,L,H_in) depending on batch_first
- h_0:   (D*num_layers,H_out)   (D*num_layers,N,H_out)
- c_0:   (D*num_layers,H_cell)   (D*num_layers,N,H_cell)

N=batch size       L=sequence length         D=2 if 双向 else 1

输出：output, (h_n, c_n)
- output: (L,D*H_out)         (L,N,D*H_out)or(N,L,D*H_out)
- h_n:   (D*num_layers,H_out)    (D*num_layers,N,H_out)
- c_n:   (D*num_layers,H_cell)   (D*num_layers,N,H_cell)

- H_in=input size
- H_cell=hidden size
- H_out=hidden size  OR  proj_size if it>0

In [None]:
#from torch.utils.tensorboard import SummaryWriter  #试用tensorboard
#writer = SummaryWriter('./log')

BATCH=3

rnn = nn.LSTM(5, 7, 2).to(device)
input = torch.randn((4, BATCH, 5),device=device)
h0 = torch.randn((2, BATCH, 7),device=device)
c0 = torch.randn((2, BATCH, 7),device=device)
output, (hn, cn) = rnn(input, (h0, c0))
print(input)
print(output)

output.backward(torch.ones_like(output))
print(type(rnn.named_parameters()))
print(list(rnn.named_parameters()))

#writer.add_scalar("output", output[0][0][0], 1)

#torch.onnx.export(rnn,input,'rnn.onnx',export_params=True,opset_version=8,) #BATCH调为1

## 2.5 Transformer层

In [23]:
device='cpu'

transformer_model = nn.Transformer(d_model=5,nhead=5, num_encoder_layers=12).to(device)
src = torch.rand((2, 4, 5),device=device)  #source sequence length, batch size, feature number
tgt = torch.rand((3, 4, 5),device=device)  #target sequence length, batch size, feature number
out = transformer_model(src, tgt)

print("输入x：")
print(src)
print("输入y（标注）：")
print(tgt)
print("输出y（预测）：")
print(out)

out.backward(torch.ones_like(out))
print(list(transformer_model.named_parameters()))

输入x：
tensor([[[1.2518e-01, 7.6708e-01, 9.9076e-01, 9.6356e-01, 6.7897e-02],
         [3.9633e-01, 8.0347e-01, 8.4013e-02, 9.0199e-01, 3.1302e-01],
         [1.1181e-01, 7.9944e-01, 6.7452e-01, 9.3328e-01, 6.3262e-02],
         [2.0702e-01, 6.8050e-01, 3.9385e-01, 8.1455e-01, 3.5479e-01]],

        [[4.4721e-01, 9.4178e-01, 7.0384e-01, 1.5469e-01, 5.3296e-02],
         [3.1771e-01, 2.6059e-01, 1.1724e-01, 9.5483e-01, 7.5291e-01],
         [9.8392e-01, 9.0223e-02, 5.2201e-01, 7.5489e-01, 4.1476e-01],
         [5.9577e-01, 3.5304e-02, 4.0942e-01, 6.5249e-01, 2.2043e-01]],

        [[6.0339e-01, 2.2344e-01, 8.3389e-01, 8.6196e-02, 5.6590e-01],
         [1.7142e-01, 2.7986e-01, 9.1285e-01, 4.6759e-01, 1.2341e-01],
         [7.5597e-01, 9.1207e-01, 8.7149e-02, 6.0758e-01, 6.6533e-01],
         [5.3542e-01, 9.4108e-01, 7.2428e-01, 7.0315e-01, 3.0422e-01]],

        [[8.5563e-01, 4.1562e-01, 9.7038e-01, 6.9138e-01, 5.4039e-01],
         [3.8641e-01, 5.2206e-01, 2.0135e-01, 5.4041e-01, 2.0157e-

In [16]:
torch.onnx.export(transformer_model,(src,tgt),'transformer.onnx',export_params=True,opset_version=8,) #BATCH调为1

  _C._jit_pass_onnx_graph_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(
