# 查看FashionMNIST原始数据格式

In [1]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from wangdao_deeplearning_train import EarlyStopping, ModelSaver,train_classification_model,plot_learning_curves
from wangdao_deeplearning_train import evaluate_classification_model as evaluate_model


In [2]:
import torchvision.models as models
# 导入必要的库
import torch.nn as nn
import torchvision.models as models
from torchsummary import torchsummary
from torchviz import make_dot

# 加载预训练的InceptionNet模型（Inception v3）
inception_model = models.inception_v3(pretrained=True)

# 打印模型结构
print("InceptionNet模型结构:")
print(inception_model)

# 将模型移至GPU（如果可用）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inception_model = inception_model.to(device)

# 使用torchsummary打印模型参数统计
print("\nInceptionNet模型参数统计:")
torchsummary.summary(inception_model, (3, 299, 299))  # Inception v3需要299x299的输入

# 创建一个随机输入张量来可视化模型
# 设置模型为评估模式以避免批归一化层的错误
inception_model.eval()
with torch.no_grad():
    dummy_input = torch.randn(1, 3, 299, 299).to(device)
    output = inception_model(dummy_input)

# 使用torchviz可视化模型结构
# 修复AttributeError: 'Tensor' object has no attribute 'logits'
# Inception v3在eval模式下直接返回tensor而不是包含logits属性的对象
model_graph = make_dot(output, params=dict(inception_model.named_parameters()))
model_graph.render("inception_model", format="png")





InceptionNet模型结构:
Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel

'inception_model.png'

# 加载数据并处理为tensor

In [3]:
# 加载CIFAR-10数据集
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

# 定义CIFAR-10数据集类
class CIFAR10Dataset(Dataset):
    def __init__(self, img_dir, labels_df, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        
        self.labels_df = labels_df
        self.img_names = self.labels_df.iloc[:, 0].values.astype(str)  # 第一列是图片名称，确保为字符串类型
        
        # 类别名称字典，使用字典可以提高查找速度
        self.class_names_dict = {'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 
                                 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
        # 将文本标签转换为数字ID
        self.labels = [self.class_names_dict[label] for label in self.labels_df.iloc[:, 1].values]
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx] + '.png') #图片路径
        image = Image.open(img_path) #打开图片
        label = self.labels[idx] #标签
        
        if self.transform:
            image_tensor = self.transform(image) #转换为张量
            
        return image_tensor, label

# 读取标签文件
img_dir = r"D:\cifar-10\train\train"
labels_file = r"D:\cifar-10\trainLabels.csv"
labels_df = pd.read_csv(labels_file)

# 划分数据集
train_size = 45000
val_size = 5000
train_df = labels_df.iloc[:train_size]
val_df = labels_df.iloc[train_size:]

# 定义训练集数据预处理（包含图像增强）
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomRotation(40), #随机旋转
    transforms.RandomHorizontalFlip(),  #随机水平翻转
    transforms.Normalize((0.4917, 0.4823, 0.4467), (0.2024, 0.1995, 0.2010))
])

# 定义验证集数据预处理（不做图像增强）
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4917, 0.4823, 0.4467), (0.2024, 0.1995, 0.2010))
])

# 创建训练集和验证集
train_dataset = CIFAR10Dataset(img_dir=img_dir, labels_df=train_df, transform=train_transform)
val_dataset = CIFAR10Dataset(img_dir=img_dir, labels_df=val_df, transform=val_transform)

# 定义类别名称
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# 查看数据集基本信息
print(f"训练集大小: {len(train_dataset)}")
print(f"验证集大小: {len(val_dataset)}")


训练集大小: 45000
验证集大小: 5000


In [4]:
def cal_mean_std(ds):
    mean = 0.
    std = 0.
    for img, _ in ds:
        mean += img.mean(dim=(1, 2)) #dim=(1, 2)表示在通道维度上求平均
        std += img.std(dim=(1, 2))  #dim=(1, 2)表示在通道维度上求标准差
    mean /= len(ds)
    std /= len(ds)
    return mean, std
# cal_mean_std(train_dataset)

# 把数据集划分为训练集45000和验证集5000，并给DataLoader

In [5]:

# 创建数据加载器
batch_size = 64
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True #打乱数据集，每次迭代时，数据集的顺序都会被打乱
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False
)




# 搭建模型

In [6]:
#理解每个接口的方法，单独写例子
import torch.nn as nn
m=nn.BatchNorm1d(100)
x=torch.randn(20,100)
print(m(x).shape)

torch.Size([20, 100])


# 搭建InceptionNet模型

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# 定义Inception模块
class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out1x1, red3x3, out3x3, red5x5, out5x5, pool_proj):
        super(InceptionBlock, self).__init__()
        
        # 1x1卷积分支
        self.branch1 = nn.Conv2d(in_channels, out1x1, kernel_size=1) #64
        
        # 3x3卷积分支
        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, red3x3, kernel_size=1),#96
            nn.BatchNorm2d(red3x3),
            nn.ReLU(inplace=True),
            nn.Conv2d(red3x3, out3x3, kernel_size=3, padding=1),
            nn.BatchNorm2d(out3x3),
            nn.ReLU(inplace=True)
        ) #128
        
        # 5x5卷积分支
        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, red5x5, kernel_size=1), #16
            nn.BatchNorm2d(red5x5),
            nn.ReLU(inplace=True),
            nn.Conv2d(red5x5, out5x5, kernel_size=5, padding=2),
            nn.BatchNorm2d(out5x5),
            nn.ReLU(inplace=True)
        ) #32
        
        # 最大池化分支
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, pool_proj, kernel_size=1), #32
            nn.BatchNorm2d(pool_proj),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        
        # 在通道维度上拼接四个分支的输出
        return torch.cat([branch1, branch2, branch3, branch4], 1)

# 定义InceptionNet模型
class InceptionNetV1(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        
        # 初始卷积层
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Inception模块
        self.inception1 = InceptionBlock(64, 64, 96, 128, 16, 32, 32)  # 输出通道: 64+128+32+32=256
        
        # 池化层
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 第二个Inception模块
        self.inception2 = InceptionBlock(256, 128, 128, 192, 32, 96, 64)  # 输出通道: 128+192+96+64=480
        
        # 池化层
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 全连接层
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(480 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        # 卷积块
        x = self.conv_block1(x)
        
        # Inception块
        # 第一个Inception块: 输入[batch_size, 64, 16, 16] -> 输出[batch_size, 256, 16, 16]
        x = self.inception1(x)
        # print(f'inception1: {x.shape}')
        # 池化层: 输入[batch_size, 256, 16, 16] -> 输出[batch_size, 256, 8, 8]
        x = self.maxpool1(x)
        
        # 第二个Inception块: 输入[batch_size, 256, 8, 8] -> 输出[batch_size, 480, 8, 8]
        x = self.inception2(x)
        # print(f'inception2: {x.shape}')
        # 池化层: 输入[batch_size, 480, 8, 8] -> 输出[batch_size, 480, 4, 4]
        x = self.maxpool2(x)
        
        # 展平: 将[batch_size, 480, 4, 4]转换为[batch_size, 480*4*4]
        x = torch.flatten(x, 1)
        
        # 分类器
        x = self.classifier(x)
        
        return x

In [17]:
# 实例化模型
model = InceptionNetV1()

# 从train_loader获取第一个批次的数据
dataiter = iter(train_loader)
images, labels = next(dataiter)

# 查看批次数据的形状
print("批次图像形状:", images.shape)
print("批次标签形状:", labels.shape)


print('-'*100)
# 进行前向传播
with torch.no_grad():  # 不需要计算梯度
    outputs = model(images)


print(outputs.shape)


批次图像形状: torch.Size([64, 3, 32, 32])
批次标签形状: torch.Size([64])
----------------------------------------------------------------------------------------------------
torch.Size([64, 10])


In [18]:
# 计算模型的总参数量
# 统计需要求梯度的参数总量
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"需要求梯度的参数总量: {total_params}")

# 统计所有参数总量
all_params = sum(p.numel() for p in model.parameters())
print(f"模型总参数量: {all_params}")

# 查看每层参数量明细
print("\n各层参数量明细:")
for name, param in model.named_parameters():
    print(f"{name}: {param.numel()} 参数")


需要求梯度的参数总量: 4467162
模型总参数量: 4467162

各层参数量明细:
conv_block1.0.weight: 1728 参数
conv_block1.0.bias: 64 参数
conv_block1.1.weight: 64 参数
conv_block1.1.bias: 64 参数
inception1.branch1.weight: 4096 参数
inception1.branch1.bias: 64 参数
inception1.branch2.0.weight: 6144 参数
inception1.branch2.0.bias: 96 参数
inception1.branch2.1.weight: 96 参数
inception1.branch2.1.bias: 96 参数
inception1.branch2.3.weight: 110592 参数
inception1.branch2.3.bias: 128 参数
inception1.branch2.4.weight: 128 参数
inception1.branch2.4.bias: 128 参数
inception1.branch3.0.weight: 1024 参数
inception1.branch3.0.bias: 16 参数
inception1.branch3.1.weight: 16 参数
inception1.branch3.1.bias: 16 参数
inception1.branch3.3.weight: 12800 参数
inception1.branch3.3.bias: 32 参数
inception1.branch3.4.weight: 32 参数
inception1.branch3.4.bias: 32 参数
inception1.branch4.1.weight: 2048 参数
inception1.branch4.1.bias: 32 参数
inception1.branch4.2.weight: 32 参数
inception1.branch4.2.bias: 32 参数
inception2.branch1.weight: 32768 参数
inception2.branch1.bias: 128 参数
inception2.bra

In [19]:
128*3*3*256

294912

In [20]:
model.state_dict()

OrderedDict([('conv_block1.0.weight',
              tensor([[[[ 0.0375,  0.0589,  0.1060],
                        [ 0.0226,  0.0641, -0.1756],
                        [-0.0808, -0.1497, -0.0865]],
              
                       [[ 0.0120, -0.1131, -0.1155],
                        [ 0.0952,  0.1804,  0.0533],
                        [-0.1544,  0.1605, -0.1621]],
              
                       [[-0.0135, -0.0190,  0.1226],
                        [ 0.1026,  0.0314, -0.1888],
                        [-0.0146, -0.1098, -0.0836]]],
              
              
                      [[[-0.0396, -0.0010, -0.0621],
                        [ 0.1182, -0.1910, -0.1833],
                        [ 0.1923,  0.0996, -0.1482]],
              
                       [[-0.0978, -0.1694,  0.0198],
                        [-0.1735,  0.0722, -0.1030],
                        [-0.0924, -0.1452, -0.0474]],
              
                       [[-0.0486,  0.1584, -0.0466],
                  

# 设置交叉熵损失函数，SGD优化器

In [21]:
model = InceptionNetV1()
# 定义损失函数和优化器
loss_fn = nn.CrossEntropyLoss()  # 交叉熵损失函数，适用于多分类问题，里边会做softmax，还有会把0-9标签转换成one-hot编码

print("损失函数:", loss_fn)



损失函数: CrossEntropyLoss()


In [22]:
model = InceptionNetV1()

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  # SGD优化器，学习率为0.01，动量为0.9

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
model = model.to(device) #将模型移动到GPU
early_stopping=EarlyStopping(patience=5, delta=0.001)
model_saver=ModelSaver(save_dir='model_weights', save_best_only=True)


model, history = train_classification_model(model, train_loader, val_loader, loss_fn, optimizer, device, num_epochs=50, early_stopping=early_stopping, model_saver=model_saver, tensorboard_logger=None)



使用设备: cpu
训练开始，共35200步


  0%|          | 0/35200 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
history['train'][-100:-1]

In [None]:
history['val'][-1000:-1]

# 绘制损失曲线和准确率曲线

In [None]:
plot_learning_curves(history, sample_step=500)  #横坐标是 steps

In [None]:
# 导入所需库
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import tqdm

# 定义测试数据集类
class CIFAR10TestDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        """
        初始化测试数据集
        
        参数:
            img_dir: 测试图片目录
            transform: 图像预处理变换
        """
        self.img_dir = img_dir
        self.transform = transform
        self.img_files = [f for f in os.listdir(img_dir) if f.endswith('.png')]
        
    def __len__(self):
        return len(self.img_files)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_files[idx])
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        # 提取图像ID（文件名去掉扩展名）
        img_id = int(os.path.splitext(self.img_files[idx])[0])
        
        return image, img_id

# 定义预测函数
def predict_test_set(model, img_dir, labels_file, device, batch_size=64):
    """
    预测测试集并生成提交文件
    
    参数:
        model: 训练好的模型
        img_dir: 测试图片目录
        labels_file: 提交模板文件路径
        device: 计算设备
        batch_size: 批处理大小
    """
    # 图像预处理变换（与训练集相同）
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4917, 0.4823, 0.4467), (0.2024, 0.1995, 0.2010))
    ])
    
    # 创建测试数据集和数据加载器
    test_dataset = CIFAR10TestDataset(img_dir, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # 设置模型为评估模式
    model.eval()
    
    # 读取提交模板
    submission_df = pd.read_csv(labels_file)
    predictions = {}
    
    # 使用tqdm显示进度条
    print("正在预测测试集...")
    with torch.no_grad():
        for images, img_ids in tqdm.tqdm(test_loader, desc="预测进度"):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1) #取最大的索引，作为预测结果 
            
            # 记录每个图像的预测结果
            for i, img_id in enumerate(img_ids):
                predictions[img_id.item()] = predicted[i].item() #因为一个批次有多个图像，所以需要predicted[i]
    
    # 定义类别名称
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    
    # 将数值标签转换为类别名称
    labeled_predictions = {img_id: class_names[pred] for img_id, pred in predictions.items()}
    
    # 直接创建DataFrame
    submission_df = pd.DataFrame({
        'id': list(labeled_predictions.keys()),
        'label': list(labeled_predictions.values())
    })
    
    # 按id列排序
    submission_df = submission_df.sort_values(by='id')
    
    # 检查id列是否有重复值
    has_duplicates = submission_df['id'].duplicated().any()
    print(f"id列是否有重复值: {has_duplicates}")
    
    # 保存预测结果
    output_file = 'cifar10_submission.csv'
    submission_df.to_csv(output_file, index=False)
    print(f"预测完成，结果已保存至 {output_file}")

# 执行测试集预测
img_dir = r"D:\cifar-10\test\test"
labels_file = r"D:\cifar-10\sampleSubmission.csv"
predict_test_set(model, img_dir, labels_file, device, batch_size=128)
