<font color="red">注</font>: 使用 tensorboard 可视化需要安装 tensorflow (TensorBoard依赖于tensorflow库，可以任意安装tensorflow的gpu/cpu版本)

```shell
pip install tensorflow-cpu
```

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F

print(sys.version_info)
for module in mpl, np, pd, sklearn, torch:
    print(module.__name__, module.__version__)
    
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

seed = 42


sys.version_info(major=3, minor=12, micro=3, releaselevel='final', serial=0)
matplotlib 3.10.0
numpy 2.0.2
pandas 2.2.3
sklearn 1.6.0
torch 2.5.1+cpu
cpu


## 数据准备

In [4]:
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import random_split

# fashion_mnist图像分类数据集
train_ds = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_ds = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

# torchvision 数据集里没有提供训练集和验证集的划分
# 这里用 random_split 按照 11 : 1 的比例来划分数据集
train_ds, val_ds = random_split(train_ds, [55000, 5000], torch.Generator().manual_seed(seed))

In [5]:
from torchvision.transforms import Normalize

# 遍历train_ds得到每张图片，计算每个通道的均值和方差
def cal_mean_std(ds):
    mean = 0.
    std = 0.
    for img, _ in ds:
        mean += img.mean(dim=(1, 2))
        std += img.std(dim=(1, 2))
    mean /= len(ds)
    std /= len(ds)
    return mean, std


# print(cal_mean_std(train_ds))
# 0.2860， 0.3205
transforms = nn.Sequential(
    Normalize([0.2856], [0.3202])
) # 对每个通道进行标准化

In [6]:
img, label = train_ds[0]
img.shape, label

(torch.Size([1, 28, 28]), 9)

In [8]:
from torch.utils.data.dataloader import DataLoader

batch_size = 32
# 从数据集到dataloader
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4)

## 定义模型

In [9]:
128*9

1152

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class CNN(nn.Module):
    def __init__(self, activation="relu"):
        """
        初始化CNN模型。
        :param activation: 激活函数类型，默认为"relu"。可选值："relu" 或 "selu"。
        """
        super(CNN, self).__init__()
        # 根据传入的激活函数类型，选择相应的激活函数
        self.activation = F.relu if activation == "relu" else F.selu

        # 定义卷积层
        # 输入通道数：1（灰度图像），输出通道数：32，卷积核大小：3x3，填充：1（保持尺寸不变）
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        # 输入通道数：32，输出通道数：32，卷积核大小：3x3，填充：1
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        # 池化层，池化核大小：2x2，步长：2
        self.pool = nn.MaxPool2d(2, 2)
        # 输入通道数：32，输出通道数：64，卷积核大小：3x3，填充：1
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        # 输入通道数：64，输出通道数：64，卷积核大小：3x3，填充：1
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        # 输入通道数：64，输出通道数：128，卷积核大小：3x3，填充：1
        self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        # 输入通道数：128，输出通道数：128，卷积核大小：3x3，填充：1
        self.conv6 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        # 展平层，将多维输入一维化
        self.flatten = nn.Flatten()
        # 全连接层1，输入特征数：128*3*3，输出特征数：128
        self.fc1 = nn.Linear(128 * 3 * 3, 128)
        # 全连接层2，输入特征数：128，输出特征数：10（对应10个类别）
        self.fc2 = nn.Linear(128, 10)

        # 初始化权重
        self.init_weights()

    def init_weights(self):
        """
        使用Xavier均匀分布初始化卷积层和全连接层的权重。
        """
        for m in self.modules():
            if isinstance(m, (nn.Linear, nn.Conv2d)):
                nn.init.xavier_uniform_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        """
        定义前向传播过程。
        :param x: 输入张量，形状为(batch_size, 1, 28, 28)
        :return: 输出张量，形状为(batch_size, 10)
        """
        # 第一组卷积层 -> 激活函数 -> 池化层
        x = self.pool(self.activation(self.conv2(self.activation(self.conv1(x)))))  # 1 * 28 * 28 -> 32 * 14 * 14
        print(x.shape)  # 打印中间层输出形状
        # 第二组卷积层 -> 激活函数 -> 池化层
        x = self.pool(self.activation(self.conv4(self.activation(self.conv3(x)))))  # 32 * 14 * 14 -> 64 * 7 * 7
        print(x.shape)  # 打印中间层输出形状
        # 第三组卷积层 -> 激活函数 -> 池化层
        x = self.pool(self.activation(self.conv6(self.activation(self.conv5(x)))))  # 64 * 7 * 7 -> 128 * 3 * 3
        print(x.shape)  # 打印中间层输出形状
        # 展平层
        x = self.flatten(x)  # 128 * 3 * 3 -> 1152
        # 全连接层1 -> 激活函数
        x = self.activation(self.fc1(x))  # 1152 -> 128
        # 全连接层2
        x = self.fc2(x)  # 128 -> 10
        return x

# 打印模型的参数信息
for idx, (key, value) in enumerate(CNN().named_parameters()):
    print(f"{key}\t参数数量: {np.prod(value.shape)}")


conv1.weight	参数数量: 288
conv1.bias	参数数量: 32
conv2.weight	参数数量: 9216
conv2.bias	参数数量: 32
conv3.weight	参数数量: 18432
conv3.bias	参数数量: 64
conv4.weight	参数数量: 36864
conv4.bias	参数数量: 64
conv5.weight	参数数量: 73728
conv5.bias	参数数量: 128
conv6.weight	参数数量: 147456
conv6.bias	参数数量: 128
fc1.weight	参数数量: 147456
fc1.bias	参数数量: 128
fc2.weight	参数数量: 1280
fc2.bias	参数数量: 10


In [11]:
3*3*32    *32

9216

In [None]:
#练习不同尺寸的卷积核，padding，stride的效果
class CNN1(nn.Module):
    def __init__(self, activation="relu"):
        super(CNN, self).__init__()
        self.activation = F.relu if activation == "relu" else F.selu
        #输入通道数，图片是灰度图，所以是1，图片是彩色图，就是3，输出通道数，就是卷积核的个数（32,1,28,28）
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5,padding=2,stride=2)
        #输入x(32,32,28,28) 输出x(32,32,28,28)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2) #池化核大小为2（2*2），步长为2
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.flatten = nn.Flatten()
        # input shape is (28, 28, 1) so the fc1 layer in_features is 128 * 3 * 3
        self.fc1 = nn.Linear(128 * 3 * 3, 128)
        self.fc2 = nn.Linear(128, 10) #输出尺寸（32,10）

        self.init_weights()

    def init_weights(self):
        """使用 xavier 均匀分布来初始化全连接层、卷积层的权重 W"""
        for m in self.modules():
            if isinstance(m, (nn.Linear, nn.Conv2d)):
                nn.init.xavier_uniform_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        act = self.activation
        x=act(self.conv1(x)) # 1 * 28 * 28 -> 32 * 28 * 28
        print(x.shape)
        # x=act(self.conv2(x)) # 32 * 28 * 28 -> 32 * 28 * 28
        # print(x.shape)
        # x = self.pool(x) # 32 * 28 * 28 -> 32 * 14 * 14
        # print(x.shape)
        # x=act(self.conv3(x)) # 32 * 14 * 14 -> 64 * 14 * 14
        # print(x.shape)
        # x=act(self.conv4(x)) # 64 * 14 * 14 -> 64 * 14 * 14
        # print(x.shape)
        # x = self.pool(x) # 32 * 14 * 14 -> 64 * 7 * 7
        # print(x.shape)
        # x=act(self.conv5(x)) # 64 * 7 * 7 -> 128 * 7 * 7
        # print(x.shape)
        # x=act(self.conv6(x)) # 128 * 7 * 7 -> 128 * 7 * 7
        # print(x.shape)
        # x = self.pool(x) # 128 * 7 * 7 -> 128 * 3 * 3
        # print(x.shape)
        # x = self.flatten(x) # 128 * 3 * 3 ->1152
        # x = act(self.fc1(x)) # 1152 -> 128
        # x = self.fc2(x) # 128 -> 10
        return x


In [12]:
activation = "relu"
model = CNN(activation)
# model.to(device)
img = torch.randn(1, 1, 28, 28)
model(img)

torch.Size([1, 32, 14, 14])
torch.Size([1, 64, 7, 7])
torch.Size([1, 128, 3, 3])


tensor([[-0.0228, -0.1328, -0.0574, -0.0596,  0.0018,  0.0389,  0.0346, -0.0524,
         -0.1359, -0.0186]], grad_fn=<AddmmBackward0>)

In [None]:

class CNN(nn.Module):
    def __init__(self, activation="relu"):
        super(CNN, self).__init__()
        self.activation = F.relu if activation == "relu" else F.selu
        #输入通道数，图片是灰度图，所以是1，图片是彩色图，就是3，输出通道数，就是卷积核的个数（32,1,28,28）
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        #输入x(32,32,28,28) 输出x(32,32,28,28)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2) #池化核大小为2（2*2），步长为2
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.flatten = nn.Flatten()
        # input shape is (28, 28, 1) so the fc1 layer in_features is 128 * 3 * 3
        self.fc1 = nn.Linear(128 * 3 * 3, 128)
        self.fc2 = nn.Linear(128, 10) #输出尺寸（32,10）

        self.init_weights()

    def init_weights(self):
        """使用 xavier 均匀分布来初始化全连接层、卷积层的权重 W"""
        for m in self.modules():
            if isinstance(m, (nn.Linear, nn.Conv2d)):
                nn.init.xavier_uniform_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        act = self.activation
        x=act(self.conv1(x)) # 1 * 28 * 28 -> 32 * 28 * 28
        print(x.shape)
        x=act(self.conv2(x)) # 32 * 28 * 28 -> 32 * 28 * 28
        print(x.shape)
        x = self.pool(x) # 32 * 28 * 28 -> 32 * 14 * 14
        print(x.shape)
        x=act(self.conv3(x)) # 32 * 14 * 14 -> 64 * 14 * 14
        print(x.shape)
        x=act(self.conv4(x)) # 64 * 14 * 14 -> 64 * 14 * 14
        print(x.shape)
        x = self.pool(x) # 32 * 14 * 14 -> 64 * 7 * 7
        print(x.shape)
        x=act(self.conv5(x)) # 64 * 7 * 7 -> 128 * 7 * 7
        print(x.shape)
        x=act(self.conv6(x)) # 128 * 7 * 7 -> 128 * 7 * 7
        print(x.shape)
        x = self.pool(x) # 128 * 7 * 7 -> 128 * 3 * 3
        print(x.shape)
        x = self.flatten(x) # 128 * 3 * 3 ->1152
        x = act(self.fc1(x)) # 1152 -> 128
        x = self.fc2(x) # 128 -> 10
        return x


for idx, (key, value) in enumerate(CNN().named_parameters()):
    print(f"{key}\tparamerters num: {np.prod(value.shape)}") # 打印模型的参数信息


In [None]:
activation = "relu"
model = CNN(activation)
# model.to(device)
# img = torch.randn(1, 1, 28, 28)
# model(img)