In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

# 定义 CNN 模型
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # -> 32 x 28 x 28
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # -> 64 x 28 x 28
        self.pool = nn.MaxPool2d(2, 2)                           # 每次池化尺寸减半
        self.dropout = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))              # -> (B, 32, 28, 28)
        x = self.pool(F.relu(self.conv2(x)))   # -> (B, 64, 14, 14)
        x = self.pool(x)                       # -> (B, 64, 7, 7)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)              # -> (B, 3136)
        x = F.relu(self.fc1(x))                # -> (B, 128)
        x = self.fc2(x)                        # -> (B, 10)
        return x

In [None]:
# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # 标准化
])

# 加载 MNIST 数据集
train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('../data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

# 设置优化器和损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
# 训练模型
for epoch in range(1, 6):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch} 完成")

# 测试模型
model.eval()
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()

print(f"\n测试准确率：{correct / len(test_loader.dataset):.4f}")

In [None]:
# 显示前10个预测结果
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

with torch.no_grad():
    output = model(example_data.to(device))

plt.figure(figsize=(10, 2))
for i in range(10):
    plt.subplot(1, 10, i + 1)
    plt.imshow(example_data[i][0].cpu(), cmap='gray')
    plt.title(f"{output[i].argmax().item()}")
    plt.axis('off')
plt.show()

# 导出 CNN 模型参数到二进制文件

获取并导出所有需要的模型参数，包括卷积层和全连接层的权重与偏置。

In [None]:
# 获取模型参数的state_dict
state_dict = model.state_dict()
print("模型参数列表:")
for param_name in state_dict:
    print(f"{param_name}: {state_dict[param_name].shape}")

提取我们需要的参数，包括：
- conv1.weight：第一个卷积层的权重
- conv1.bias：第一个卷积层的偏置
- conv2.weight：第二个卷积层的权重
- conv2.bias：第二个卷积层的偏置
- fc1.weight：第一个全连接层的权重
- fc1.bias：第一个全连接层的偏置
- fc2.weight：第二个全连接层的权重
- fc2.bias：第二个全连接层的偏置

In [None]:
# 提取所需参数
conv1_weight = state_dict['conv1.weight']
conv1_bias = state_dict['conv1.bias']
conv2_weight = state_dict['conv2.weight']
conv2_bias = state_dict['conv2.bias']
fc1_weight = state_dict['fc1.weight']
fc1_bias = state_dict['fc1.bias'] 
fc2_weight = state_dict['fc2.weight']
fc2_bias = state_dict['fc2.bias']

# 检查参数形状
print(f"conv1_weight: {conv1_weight.shape}")
print(f"conv1_bias: {conv1_bias.shape}")
print(f"conv2_weight: {conv2_weight.shape}")
print(f"conv2_bias: {conv2_bias.shape}")
print(f"fc1_weight: {fc1_weight.shape}")
print(f"fc1_bias: {fc1_bias.shape}")
print(f"fc2_weight: {fc2_weight.shape}")
print(f"fc2_bias: {fc2_bias.shape}")

In [None]:
# 将所有参数写入二进制文件
with open("parameters_cnn.bin", "wb") as out_bin:
    conv1_weight.cpu().numpy().astype(np.float32).tofile(out_bin)
    conv1_bias.cpu().numpy().astype(np.float32).tofile(out_bin)
    conv2_weight.cpu().numpy().astype(np.float32).tofile(out_bin)
    conv2_bias.cpu().numpy().astype(np.float32).tofile(out_bin)
    fc1_weight.cpu().numpy().astype(np.float32).tofile(out_bin)
    fc1_bias.cpu().numpy().astype(np.float32).tofile(out_bin)
    fc2_weight.cpu().numpy().astype(np.float32).tofile(out_bin)
    fc2_bias.cpu().numpy().astype(np.float32).tofile(out_bin)

print("参数已成功导出到 parameters_cnn.bin")