In [None]:
import torch 
import torch.nn  as nn 
from torchvision import models 
 
# 加载预训练ResNet并移除原始第一层 
model = models.resnet18(pretrained=True) 
original_conv1 = model.conv1   # 保存原第一层参数备用 

In [None]:
# 设计新卷积层（输入通道3→输出通道64，保持空间尺寸）
custom_conv = nn.Sequential(
    nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(32),
    nn.ReLU(inplace=True),
    nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False)
)

# conv = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
# dummy_input = torch.randn(1,  3, 224, 224)
# print(conv(dummy_input).shape)  # 输出torch.Size([1, 64, 112, 112])

参数	必须保持一致的项目	检查方法
输出通道数	需与原模型第一层输入通道相同	print(original_conv1.in_channels)
特征图尺寸	确保经过自定义层后的尺寸符合要求	计算(W−K+2P)/S+1
W：输入尺寸（宽/高）
K：卷积核大小（如3表示3×3卷积）
P：填充（padding）像素数
S：步长（stride

In [None]:
# from transformers import ViTModel 
 
# # 加载预训练ViT 
# vit = ViTModel.from_pretrained("google/vit-base-patch16-224") 
 
# # 添加CNN特征提取头 
# class ViTWithCNN(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.cnn_head  = nn.Sequential(
#             nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
#             nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
#         )
#         self.vit  = vit 
        
#     def forward(self, x):
#         x = self.cnn_head(x)   # [B, 64, 56, 56]
#         x = x.flatten(2).transpose(1,  2)  # 转换为序列格式 
#         return self.vit(inputs_embeds=x).last_hidden_state  
 
# model = ViTWithCNN()

In [None]:
# 方法1：直接替换（适用于结构简单的模型）
model.conv1  = custom_conv 
 
# 方法2：构建新模型类（推荐）
class CustomResNet(nn.Module):
    def __init__(self, original_model):
        super().__init__()
        self.custom_head  = custom_conv 
        self.backbone  = nn.Sequential(*list(original_model.children())[1:]) 
        
    def forward(self, x):
        x = self.custom_head(x) 
        return self.backbone(x) 
 
model = CustomResNet(model)

In [None]:
import torch 
import torch.nn  as nn 
import torch.nn.functional  as F 
 
class FixedDualConv(nn.Module):
    def __init__(self, kernel1, kernel2):
        super().__init__()
        # 转换为PyTorch张量并冻结梯度 
        self.register_buffer('kernel1',  torch.tensor(kernel1,  dtype=torch.float32)) 
        self.register_buffer('kernel2',  torch.tensor(kernel2,  dtype=torch.float32)) 
        
        # 自动推导卷积参数 
        self.in_channels  = self.kernel1.size(1) 
        self.out_channels  = self.kernel1.size(0)  + self.kernel2.size(0) 
        
    def forward(self, x):
        # 手动卷积计算（避免自动求导）
        with torch.no_grad(): 
            # 扩展维度匹配卷积要求 [out_c, in_c, H, W]
            k1 = self.kernel1.unsqueeze(-1).unsqueeze(-1) 
            k2 = self.kernel2.unsqueeze(-1).unsqueeze(-1) 
            
            # 分离计算各通道 
            conv1 = F.conv2d(x,  k1, padding='same')
            conv2 = F.conv2d(x,  k2, padding='same')
            
        return torch.cat([conv1,  conv2], dim=1)

# 用户自定义卷积核（示例：边缘检测+模糊核）
edge_kernel = [
    [[-1, -1, -1],  # 输出通道1 
     [-1,  8, -1],
     [-1, -1, -1]],
    
    [[ 0,  0,  0],  # 输出通道2 
     [ 0,  1,  0],
     [ 0,  0,  0]]
]  # shape: (2, 1, 3, 3)
 
blur_kernel = [
    [[1, 2, 1],     # 输出通道1 
     [2, 4, 2],
     [1, 2, 1]]
]  # shape: (1, 1, 3, 3)
 
# 初始化预处理层 
fixed_conv = FixedDualConv(edge_kernel, blur_kernel).eval()  # eval模式确保不计算梯度 
 
# 应用到输入数据 
dummy_input = torch.randn(1,  1, 28, 28)  # 假设是MNIST数据 
processed = fixed_conv(dummy_input)  # 输出shape: [1, 3, 28, 28] 


In [None]:

# 自定义层初始化（保持方差一致）
nn.init.kaiming_normal_(custom_conv[0].weight,  mode='fan_out', nonlinearity='relu')
nn.init.constant_(custom_conv[1].weight,  1)
nn.init.constant_(custom_conv[1].bias,  0)
# 分层设置学习率（新卷积层用更大学习率）
optimizer = torch.optim.SGD([ 
    {'params': model.custom_head.parameters(),  'lr': 1e-3}, 
    {'params': model.backbone.parameters(),  'lr': 1e-4}
], momentum=0.9)

In [None]:
model = torch.compile(model,  mode='max-autotune')