In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

def demonstrate_offset_grad_shapes():
    print("=== 可变形卷积偏移量梯度形状演示 ===\n")
    
    # 设置基本参数
    batch_size = 2
    in_channels = 64
    out_channels = 128
    height, width = 32, 32
    kernel_size = 3
    stride = 1
    padding = 1
    
    # 计算输出尺寸
    H_out = (height + 2 * padding - kernel_size) // stride + 1
    W_out = (width + 2 * padding - kernel_size) // stride + 1
    num_points = kernel_size * kernel_size
    
    print(f"输入特征图: [{batch_size}, {in_channels}, {height}, {width}]")
    print(f"输出特征图: [{batch_size}, {out_channels}, {H_out}, {W_out}]")
    print(f"卷积核大小: {kernel_size}×{kernel_size}, 采样点数量: {num_points}")
    print()
    
    # 模拟可变形卷积的各个组件
    # 1. 输入特征图
    x = torch.randn(batch_size, in_channels, height, width, requires_grad=True)
    
    # 2. 偏移量场 (由额外的卷积层生成)
    offset_field = torch.randn(batch_size, 2 * num_points, H_out, W_out, requires_grad=True)
    
    # 3. 调制因子场 (Deformable Conv v2)
    modulation_field = torch.sigmoid(torch.randn(batch_size, num_points, H_out, W_out, requires_grad=True))
    
    # 4. 主卷积权重
    weight = torch.randn(out_channels, in_channels, kernel_size, kernel_size, requires_grad=True)
    
    # 5. 模拟上层梯度 (∂L/∂y)
    grad_output = torch.randn(batch_size, out_channels, H_out, W_out)
    
    print("=== 前向传播过程中的形状变化 ===")
    
    # 模拟双线性插值采样过程
    # 在实际实现中，这里会有复杂的坐标计算和插值
    # 这里我们简化为一个占位张量来表示采样后的特征
    sampled_features_shape = (batch_size, in_channels, num_points, H_out, W_out)
    print(f"采样后特征形状: {sampled_features_shape}")
    
    # 调制后的特征
    modulation_expanded = modulation_field.unsqueeze(1)  # [N, 1, K², H_out, W_out]
    modulated_features_shape = (batch_size, in_channels, num_points, H_out, W_out)
    print(f"调制后特征形状: {modulated_features_shape}")
    
    # 卷积输出 (简化的前向传播)
    # 在实际中，这里会涉及复杂的采样和加权求和
    output_shape = (batch_size, out_channels, H_out, W_out)
    print(f"卷积输出形状: {output_shape}")
    
    print("\n=== 反向传播过程中的形状变化 ===")
    
    # 模拟偏移量梯度计算的关键步骤
    
    # 步骤1: 计算对调制后特征的梯度 (∂L/∂x_modulated)
    # 这对应于之前讨论的 columns = W^T × grad_output
    
    # 重塑权重和梯度以进行矩阵乘法
    weight_flat = weight.view(out_channels, in_channels * num_points)  # [C_out, C_in*K²]
    weight_T = weight_flat.transpose(0, 1)  # [C_in*K², C_out]
    
    grad_output_flat = grad_output.view(batch_size, out_channels, H_out * W_out)  # [N, C_out, H_out*W_out]
    
    # 计算梯度: ∂L/∂x_modulated = W^T × (∂L/∂y)
    grad_modulated_features = torch.bmm(
        weight_T.unsqueeze(0).expand(batch_size, -1, -1),  # [N, C_in*K², C_out]
        grad_output_flat  # [N, C_out, H_out*W_out]
    )  # 结果: [N, C_in*K², H_out*W_out]
    
    grad_modulated_features = grad_modulated_features.view(
        batch_size, in_channels, num_points, H_out, W_out
    )
    
    print(f"对调制特征的梯度形状: {grad_modulated_features.shape}")
    
    # 步骤2: 计算对原始采样特征的梯度 (考虑调制因子的影响)
    # ∂L/∂x_sampled = ∂L/∂x_modulated ⊙ (1/modulation)
    # 这里简化处理，实际需要考虑调制因子的导数
    grad_sampled_features = grad_modulated_features * modulation_expanded
    
    print(f"对采样特征的梯度形状: {grad_sampled_features.shape}")
    
    # 步骤3: 模拟几何敏感度计算 (这是偏移量梯度的核心)
    # 在实际实现中，这里会计算双线性插值核相对于坐标的导数
    
    # 模拟几何敏感度张量
    # 对于每个采样点，我们需要x和y两个方向的敏感度
    geometric_sensitivity_shape = (batch_size, in_channels, num_points, H_out, W_out, 2)
    geometric_sensitivity = torch.randn(geometric_sensitivity_shape)
    print(f"几何敏感度形状: {geometric_sensitivity_shape}")
    
    # 步骤4: 计算偏移量梯度
    # ∂L/∂Δp = sum_over_channels( ∂L/∂x_sampled × 几何敏感度 )
    
    # 扩展梯度维度以进行点积
    grad_sampled_expanded = grad_sampled_features.unsqueeze(-1)  # [N, C, K², H_out, W_out, 1]
    
    # 计算点积并求和通道维度
    offset_grad = torch.sum(grad_sampled_expanded * geometric_sensitivity, dim=1)  # [N, K², H_out, W_out, 2]
    
    # 重塑为标准的偏移量场形状 [N, 2*K², H_out, W_out]
    offset_grad = offset_grad.permute(0, 1, 4, 2, 3).contiguous()  # [N, K², 2, H_out, W_out]
    offset_grad = offset_grad.view(batch_size, 2 * num_points, H_out, W_out)
    
    print(f"最终偏移量梯度形状: {offset_grad.shape}")
    
    print("\n=== 形状变化总结 ===")
    shapes = {
        "输入特征": x.shape,
        "偏移量场": offset_field.shape,
        "调制因子场": modulation_field.shape,
        "上层梯度": grad_output.shape,
        "对调制特征梯度": grad_modulated_features.shape,
        "几何敏感度": geometric_sensitivity_shape,
        "偏移量梯度": offset_grad.shape
    }
    
    for name, shape in shapes.items():
        print(f"{name:>20}: {shape}")
    
    return offset_grad

def demonstrate_matrix_operations():
    print("\n" + "="*50)
    print("矩阵操作详细演示")
    print("="*50)
    
    # 模拟具体的矩阵操作
    N, C_out, C_in, K = 2, 32, 16, 3
    H_out, W_out = 8, 8
    num_points = K * K
    
    print(f"模拟参数: N={N}, C_out={C_out}, C_in={C_in}, K={K}")
    print(f"输出空间尺寸: {H_out}×{W_out}")
    print()
    
    # 权重矩阵
    weight = torch.randn(C_out, C_in, K, K)
    print(f"原始权重形状: {weight.shape}")
    
    # flatten(1) 操作
    weight_flat = weight.flatten(1)
    print(f"flatten(1)后形状: {weight_flat.shape}")
    
    # transpose(0, 1) 操作
    weight_T = weight_flat.transpose(0, 1)
    print(f"转置后形状: {weight_T.shape}")
    
    # 上层梯度
    grad_output = torch.randn(N, C_out, H_out, W_out)
    grad_output_flat = grad_output.view(N, C_out, H_out * W_out)
    print(f"上层梯度展平后形状: {grad_output_flat.shape}")
    
    # 矩阵乘法: columns = weight_T × grad_output_flat
    # 需要扩展weight_T以匹配batch维度
    weight_T_expanded = weight_T.unsqueeze(0).expand(N, -1, -1)
    print(f"扩展后权重形状: {weight_T_expanded.shape}")
    
    columns = torch.bmm(weight_T_expanded, grad_output_flat)
    print(f"矩阵乘法结果形状: {columns.shape}")
    
    # 重塑回合适的形状
    columns_reshaped = columns.view(N, C_in, K, K, H_out, W_out)
    print(f"最终columns形状: {columns_reshaped.shape}")

if __name__ == "__main__":
    offset_grad = demonstrate_offset_grad_shapes()
    demonstrate_matrix_operations()

=== 可变形卷积偏移量梯度形状演示 ===

输入特征图: [2, 64, 32, 32]
输出特征图: [2, 128, 32, 32]
卷积核大小: 3×3, 采样点数量: 9

=== 前向传播过程中的形状变化 ===
采样后特征形状: (2, 64, 9, 32, 32)
调制后特征形状: (2, 64, 9, 32, 32)
卷积输出形状: (2, 128, 32, 32)

=== 反向传播过程中的形状变化 ===
对调制特征的梯度形状: torch.Size([2, 64, 9, 32, 32])
对采样特征的梯度形状: torch.Size([2, 64, 9, 32, 32])
几何敏感度形状: (2, 64, 9, 32, 32, 2)
最终偏移量梯度形状: torch.Size([2, 18, 32, 32])

=== 形状变化总结 ===
                输入特征: torch.Size([2, 64, 32, 32])
                偏移量场: torch.Size([2, 18, 32, 32])
               调制因子场: torch.Size([2, 9, 32, 32])
                上层梯度: torch.Size([2, 128, 32, 32])
             对调制特征梯度: torch.Size([2, 64, 9, 32, 32])
               几何敏感度: (2, 64, 9, 32, 32, 2)
               偏移量梯度: torch.Size([2, 18, 32, 32])

矩阵操作详细演示
模拟参数: N=2, C_out=32, C_in=16, K=3
输出空间尺寸: 8×8

原始权重形状: torch.Size([32, 16, 3, 3])
flatten(1)后形状: torch.Size([32, 144])
转置后形状: torch.Size([144, 32])
上层梯度展平后形状: torch.Size([2, 32, 64])
扩展后权重形状: torch.Size([2, 144, 32])
矩阵乘法结果形状: torch.Size([2, 144, 64])
最终