# 查看FashionMNIST原始数据格式

In [2]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from wangdao_deeplearning_train import EarlyStopping, ModelSaver,train_classification_model,plot_learning_curves
from wangdao_deeplearning_train import evaluate_classification_model as evaluate_model


# 加载数据并处理为tensor

In [3]:
# 加载CIFAR-10数据集
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

# 定义CIFAR-10数据集类
class CIFAR10Dataset(Dataset):
    def __init__(self, img_dir, labels_df, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        
        self.labels_df = labels_df
        self.img_names = self.labels_df.iloc[:, 0].values.astype(str)  # 第一列是图片名称，确保为字符串类型
        
        # 类别名称字典，使用字典可以提高查找速度
        self.class_names_dict = {'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 
                                 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
        # 将文本标签转换为数字ID
        self.labels = [self.class_names_dict[label] for label in self.labels_df.iloc[:, 1].values]
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx] + '.png') #图片路径
        image = Image.open(img_path) #打开图片
        label = self.labels[idx] #标签
        
        if self.transform:
            image_tensor = self.transform(image) #转换为张量
            
        return image_tensor, label

# 读取标签文件
img_dir = r"D:\cifar-10\train\train"
labels_file = r"D:\cifar-10\trainLabels.csv"
labels_df = pd.read_csv(labels_file)

# 划分数据集
train_size = 45000
val_size = 5000
train_df = labels_df.iloc[:train_size]
val_df = labels_df.iloc[train_size:]

# 定义训练集数据预处理（包含图像增强）
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomRotation(40), #随机旋转
    transforms.RandomHorizontalFlip(),  #随机水平翻转
    transforms.Normalize((0.4917, 0.4823, 0.4467), (0.2024, 0.1995, 0.2010))
])

# 定义验证集数据预处理（不做图像增强）
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4917, 0.4823, 0.4467), (0.2024, 0.1995, 0.2010))
])

# 创建训练集和验证集
train_dataset = CIFAR10Dataset(img_dir=img_dir, labels_df=train_df, transform=train_transform)
val_dataset = CIFAR10Dataset(img_dir=img_dir, labels_df=val_df, transform=val_transform)

# 定义类别名称
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# 查看数据集基本信息
print(f"训练集大小: {len(train_dataset)}")
print(f"验证集大小: {len(val_dataset)}")


训练集大小: 45000
验证集大小: 5000


In [4]:
def cal_mean_std(ds):
    mean = 0.
    std = 0.
    for img, _ in ds:
        mean += img.mean(dim=(1, 2)) #dim=(1, 2)表示在通道维度上求平均
        std += img.std(dim=(1, 2))  #dim=(1, 2)表示在通道维度上求标准差
    mean /= len(ds)
    std /= len(ds)
    return mean, std
# cal_mean_std(train_dataset)

# 把数据集划分为训练集45000和验证集5000，并给DataLoader

In [5]:

# 创建数据加载器
batch_size = 64
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True #打乱数据集，每次迭代时，数据集的顺序都会被打乱
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False
)




# 搭建模型

In [6]:
#理解每个接口的方法，单独写例子
import torch.nn as nn
m=nn.BatchNorm1d(100)
x=torch.randn(20,100)
print(m(x).shape)

torch.Size([20, 100])


# 搭建模型

In [7]:
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        # 第一组卷积层 - 使用Sequential组织
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128), #BatchNorm2d 用于处理四维张量
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # 第二组卷积层 - 使用Sequential组织
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # 第三组卷积层 - 使用Sequential组织
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # 全连接层 - 使用Sequential组织
        self.classifier = nn.Sequential(
            nn.Linear(512 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 10)
        )
        
        # 初始化权重
        self.init_weights()
        
    def init_weights(self):
        """使用 xavier 均匀分布来初始化卷积层和全连接层的权重"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
    
    def forward(self, x):
        # 前向传播使用Sequential定义的块
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        
        # 展平
        x = x.view(x.size(0), -1)
        
        # 分类器
        x = self.classifier(x)
        
        return x


In [8]:
# 实例化模型
model = NeuralNetwork()

# 从train_loader获取第一个批次的数据
dataiter = iter(train_loader)
images, labels = next(dataiter)

# 查看批次数据的形状
print("批次图像形状:", images.shape)
print("批次标签形状:", labels.shape)


print('-'*100)
# 进行前向传播
with torch.no_grad():  # 不需要计算梯度
    outputs = model(images)
    

print(outputs.shape)


批次图像形状: torch.Size([64, 3, 32, 32])
批次标签形状: torch.Size([64])
----------------------------------------------------------------------------------------------------
torch.Size([64, 10])


In [9]:
# 计算模型的总参数量
# 统计需要求梯度的参数总量
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"需要求梯度的参数总量: {total_params}")

# 统计所有参数总量
all_params = sum(p.numel() for p in model.parameters())
print(f"模型总参数量: {all_params}")

# 查看每层参数量明细
print("\n各层参数量明细:")
for name, param in model.named_parameters():
    print(f"{name}: {param.numel()} 参数")


需要求梯度的参数总量: 12979850
模型总参数量: 12979850

各层参数量明细:
conv_block1.0.weight: 3456 参数
conv_block1.0.bias: 128 参数
conv_block1.1.weight: 128 参数
conv_block1.1.bias: 128 参数
conv_block1.3.weight: 147456 参数
conv_block1.3.bias: 128 参数
conv_block1.4.weight: 128 参数
conv_block1.4.bias: 128 参数
conv_block2.0.weight: 294912 参数
conv_block2.0.bias: 256 参数
conv_block2.1.weight: 256 参数
conv_block2.1.bias: 256 参数
conv_block2.3.weight: 589824 参数
conv_block2.3.bias: 256 参数
conv_block2.4.weight: 256 参数
conv_block2.4.bias: 256 参数
conv_block3.0.weight: 1179648 参数
conv_block3.0.bias: 512 参数
conv_block3.1.weight: 512 参数
conv_block3.1.bias: 512 参数
conv_block3.3.weight: 2359296 参数
conv_block3.3.bias: 512 参数
conv_block3.4.weight: 512 参数
conv_block3.4.bias: 512 参数
classifier.0.weight: 8388608 参数
classifier.0.bias: 1024 参数
classifier.2.weight: 10240 参数
classifier.2.bias: 10 参数


In [10]:
128*3*3*256

294912

In [11]:
model.state_dict()

OrderedDict([('conv_block1.0.weight',
              tensor([[[[-0.0361, -0.0497, -0.0056],
                        [-0.0465,  0.0258, -0.0437],
                        [ 0.0674, -0.0332,  0.0155]],
              
                       [[ 0.0527,  0.0423,  0.0329],
                        [ 0.0355, -0.0624,  0.0385],
                        [ 0.0679,  0.0537, -0.0115]],
              
                       [[ 0.0533,  0.0504, -0.0355],
                        [ 0.0393,  0.0349, -0.0587],
                        [-0.0083, -0.0600,  0.0376]]],
              
              
                      [[[-0.0005, -0.0500,  0.0654],
                        [-0.0642,  0.0389,  0.0138],
                        [ 0.0567,  0.0325,  0.0399]],
              
                       [[-0.0271,  0.0378,  0.0650],
                        [-0.0653, -0.0702, -0.0525],
                        [-0.0579, -0.0233, -0.0118]],
              
                       [[-0.0613, -0.0157,  0.0604],
                  

# 设置交叉熵损失函数，SGD优化器

In [12]:
model = NeuralNetwork()
# 定义损失函数和优化器
loss_fn = nn.CrossEntropyLoss()  # 交叉熵损失函数，适用于多分类问题，里边会做softmax，还有会把0-9标签转换成one-hot编码

print("损失函数:", loss_fn)



损失函数: CrossEntropyLoss()


In [13]:
model = NeuralNetwork()

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  # SGD优化器，学习率为0.01，动量为0.9

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
model = model.to(device) #将模型移动到GPU
early_stopping=EarlyStopping(patience=5, delta=0.001)
model_saver=ModelSaver(save_dir='model_weights', save_best_only=True)


model, history = train_classification_model(model, train_loader, val_loader, loss_fn, optimizer, device, num_epochs=50, early_stopping=early_stopping, model_saver=model_saver, tensorboard_logger=None)



使用设备: cpu
训练开始，共35200步


  0%|          | 0/35200 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
history['train'][-100:-1]

In [None]:
history['val'][-1000:-1]

# 绘制损失曲线和准确率曲线

In [None]:
plot_learning_curves(history, sample_step=500)  #横坐标是 steps

In [None]:
# 导入所需库
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import tqdm

# 定义测试数据集类
class CIFAR10TestDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        """
        初始化测试数据集
        
        参数:
            img_dir: 测试图片目录
            transform: 图像预处理变换
        """
        self.img_dir = img_dir
        self.transform = transform
        self.img_files = [f for f in os.listdir(img_dir) if f.endswith('.png')]
        
    def __len__(self):
        return len(self.img_files)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_files[idx])
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        # 提取图像ID（文件名去掉扩展名）
        img_id = int(os.path.splitext(self.img_files[idx])[0])
        
        return image, img_id

# 定义预测函数
def predict_test_set(model, img_dir, labels_file, device, batch_size=64):
    """
    预测测试集并生成提交文件
    
    参数:
        model: 训练好的模型
        img_dir: 测试图片目录
        labels_file: 提交模板文件路径
        device: 计算设备
        batch_size: 批处理大小
    """
    # 图像预处理变换（与训练集相同）
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4917, 0.4823, 0.4467), (0.2024, 0.1995, 0.2010))
    ])
    
    # 创建测试数据集和数据加载器
    test_dataset = CIFAR10TestDataset(img_dir, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    # 设置模型为评估模式
    model.eval()
    
    # 读取提交模板
    submission_df = pd.read_csv(labels_file)
    predictions = {}
    
    # 使用tqdm显示进度条
    print("正在预测测试集...")
    with torch.no_grad():
        for images, img_ids in tqdm.tqdm(test_loader, desc="预测进度"):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1) #取最大的索引，作为预测结果 
            
            # 记录每个图像的预测结果
            for i, img_id in enumerate(img_ids):
                predictions[img_id.item()] = predicted[i].item() #因为一个批次有多个图像，所以需要predicted[i]
    
    # 定义类别名称
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    
    # 将数值标签转换为类别名称
    labeled_predictions = {img_id: class_names[pred] for img_id, pred in predictions.items()}
    
    # 直接创建DataFrame
    submission_df = pd.DataFrame({
        'id': list(labeled_predictions.keys()),
        'label': list(labeled_predictions.values())
    })
    
    # 按id列排序
    submission_df = submission_df.sort_values(by='id')
    
    # 检查id列是否有重复值
    has_duplicates = submission_df['id'].duplicated().any()
    print(f"id列是否有重复值: {has_duplicates}")
    
    # 保存预测结果
    output_file = 'cifar10_submission.csv'
    submission_df.to_csv(output_file, index=False)
    print(f"预测完成，结果已保存至 {output_file}")

# 执行测试集预测
img_dir = r"D:\cifar-10\test\test"
labels_file = r"D:\cifar-10\sampleSubmission.csv"
predict_test_set(model, img_dir, labels_file, device, batch_size=128)


正在预测测试集...


预测进度:   0%|          | 0/2344 [00:00<?, ?it/s]