# 16、PyTorch中进行卷积残差模块算子融合

In [42]:
import torch
import torch.nn as nn
from torch.nn import Conv2d

In [43]:
# in_channels: int, 输入特征维度
# out_channels: int, 输出特征维度
# kernel_size: _size_2_t, 卷积核大小
# stride: _size_2_t = 1, 步长，滑动跳几步
# padding: Union[str, _size_2_t] = 0,分为 valid/same
# dilation: _size_2_t = 1, 空洞卷积，原本是实心的，变相的变大感受范围
# groups: int = 1, 分为多少组，必须被 in/out channels整除。 深度可分离卷积用
# bias: bool = True,
# padding_mode: str = "zeros",  # TODO: refine this type

conv_laye1 = Conv2d(2, 2, 3, padding="same")
for i in conv_laye1.named_parameters():
    print(i[0])
print('------')
print(conv_laye1.weight)
print(conv_laye1.weight.shape)
# torch.Size([2, 2, 3, 3]) 分别是 out_channels, in_channels, kernel_width, kernel_height

weight
bias
------
Parameter containing:
tensor([[[[-0.1311,  0.2241,  0.0320],
          [ 0.0384,  0.1362,  0.1443],
          [-0.1571,  0.0456,  0.2345]],

         [[ 0.0272, -0.1808,  0.2062],
          [-0.2220, -0.1941, -0.1617],
          [ 0.1410,  0.1927, -0.2115]]],


        [[[ 0.0928,  0.1705, -0.1105],
          [ 0.0279, -0.0829,  0.1401],
          [ 0.0755,  0.1934, -0.1110]],

         [[-0.1686, -0.1503,  0.1545],
          [ 0.0384,  0.2028,  0.0521],
          [-0.0017,  0.1650,  0.1246]]]], requires_grad=True)
torch.Size([2, 2, 3, 3])


In [44]:
# group 用法
conv_laye1 =  Conv2d(2, 4, 3, padding="same", groups=2)
# 其实就是不同的卷积输出然后拼接起来
conv_laye1.weight.shape, conv_laye1.bias.shape # (torch.Size([4, 1, 3, 3]), torch.Size([4]))

(torch.Size([4, 1, 3, 3]), torch.Size([4]))

In [45]:
# group 用法
conv_laye1 =  Conv2d(1, 2, 3, padding="same")
# 其实就是不同的卷积输出然后拼接起来
conv_laye1.weight.shape, conv_laye1.bias.shape

(torch.Size([2, 1, 3, 3]), torch.Size([2]))

## 深度可分离卷积

`res_block =  3*3 conv + 1*1 conv + input`

![image-20250619202922931](http://assets.hypervoid.top/img/2025/06/19/image-20250619202922931-79dd.png)

In [46]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
in_channels = 2
out_channels = 2
kernel_size = 3 
batch_size = 1
w, h = 28, 28 # 图片宽高

### 原生写法

In [47]:
x = torch.ones((batch_size, in_channels, w, h))
conv3 = nn.Conv2d(in_channels, out_channels, kernel_size, padding="same")
conv1 = nn.Conv2d(in_channels, out_channels, 1)
result1 = conv3(x) + conv1(x) + x
result1.shape, result1

(torch.Size([1, 2, 28, 28]),
 tensor([[[[0.4720, 1.1708, 1.1708,  ..., 1.1708, 1.1708, 1.3344],
           [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
           [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
           ...,
           [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
           [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
           [0.7329, 1.0327, 1.0327,  ..., 1.0327, 1.0327, 0.7811]],
 
          [[1.1891, 1.3983, 1.3983,  ..., 1.3983, 1.3983, 1.0062],
           [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
           [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
           ...,
           [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
           [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
           [0.4166, 0.8005, 0.8005,  ..., 0.8005, 0.8005, 0.8232]]]],
        grad_fn=<AddBackward0>))

### 算子融合加速写法

In [48]:
# 把 conv1 和 x 自身写作 3*3 卷积形式
# 最后简化为一个 conv 层

# 8个元素，对应4个维度（每个维度有 left/top/front 和 right/bottom/back 两个填充量）
# 维度分别是 width、height、in_channels、out_channels
conv1_w = F.pad(conv1.weight, [1, 1, 1, 1, 0, 0, 0, 0])

conv_x_w = torch.zeros_like(conv1_w)
conv_x_w[0, 0, 1, 1] = 1  # 设置通道为x自身
conv_x_w[1, 1, 1, 1] = 1  # 设置通道为x自身


final_conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding="same")
final_conv.weight = nn.Parameter(conv3.weight + conv1_w + conv_x_w)
final_conv.bias = nn.Parameter(conv3.bias + conv1.bias + 0)

result2 = final_conv(x)
result2

tensor([[[[0.4720, 1.1708, 1.1708,  ..., 1.1708, 1.1708, 1.3344],
          [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
          [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
          ...,
          [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
          [0.8485, 1.5441, 1.5441,  ..., 1.5441, 1.5441, 1.5185],
          [0.7329, 1.0327, 1.0327,  ..., 1.0327, 1.0327, 0.7811]],

         [[1.1891, 1.3983, 1.3983,  ..., 1.3983, 1.3983, 1.0062],
          [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
          [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
          ...,
          [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
          [0.9465, 1.5522, 1.5522,  ..., 1.5522, 1.5522, 1.2437],
          [0.4166, 0.8005, 0.8005,  ..., 0.8005, 0.8005, 0.8232]]]],
       grad_fn=<ConvolutionBackward0>)

In [49]:
torch.all(torch.isclose(result1, result2))

tensor(True)

### 比较运行效率


In [51]:
import time

x = torch.ones((batch_size*100, in_channels, w, h))

t1 = time.time()
x = torch.ones((batch_size, in_channels, w, h))
conv3 = nn.Conv2d(in_channels, out_channels, kernel_size, padding="same")
conv1 = nn.Conv2d(in_channels, out_channels, 1)
result1 = conv3(x) + conv1(x) + x
t2 = time.time()
print(t2-t1)


conv1_w = F.pad(conv1.weight, [1, 1, 1, 1, 0, 0, 0, 0])

conv_x_w = torch.zeros_like(conv1_w)
conv_x_w[0, 0, 1, 1] = 1  # 设置通道为x自身
conv_x_w[1, 1, 1, 1] = 1  # 设置通道为x自身


final_conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding="same")
final_conv.weight = nn.Parameter(conv3.weight + conv1_w + conv_x_w)
final_conv.bias = nn.Parameter(conv3.bias + conv1.bias + 0)
t3 = time.time()
result2 = final_conv(x)
t4 = time.time()
print(t4-t3)

print((t2-t1)/ (t4-t3))

0.0009324550628662109
8.726119995117188e-05
10.685792349726777
