In [None]:
# 下载数据到data目录
# import os
# from kaggle.api.kaggle_api_extended import KaggleApi

# # 初始化 API
# api = KaggleApi()
# api.authenticate()

# # 下载竞赛数据集
# api.competition_download_files("digit-recognizer", path="./data")


In [None]:
import torch
import numpy as np
from torch.utils.data import DataLoader,Dataset, random_split
from torchvision import transforms
import matplotlib.pyplot as plt
#隐藏警告
import warnings
warnings.filterwarnings("ignore")
plt.rcParams['axes.unicode_minus'] = False      # 用来正常显示负号
plt.rcParams['figure.dpi']         = 100        #分辨率

transform = transforms.Compose([
    # 1. 确保输入是2维图像（兜底）
    transforms.Lambda(lambda x: x.reshape(28, 28) if x.ndim == 1 else x),
    # 2. 转为float32（关键！将int转为float）
    transforms.Lambda(lambda x: x.astype(np.float32) if isinstance(x, np.ndarray) else x.float()),
    # 3. 转为张量（此时输入是float，输出也是float张量）
    transforms.ToTensor(),
    # 4. 标准化（保持原有参数）
    transforms.Normalize((0.1307,), (0.3081,))
])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import pandas as pd
class CSVDataset(Dataset):
    def __init__(self, csv_file, has_label=False, label_col='label', transform = None):
        """
        初始化数据集
        :param csv_file: CSV文件路径
        :param has_label: 是否包含标签（训练数据为True，测试数据为False）
        :param label_col: 标签列的列名（仅当has_label=True时有效）
        """
        self.data = pd.read_csv(csv_file)
        self.has_label = has_label
        self.transform = transform
        # 提取特征（所有列，或排除标签列）
        if has_label:
            self.features = self.data.drop(columns=[label_col]).values
            self.labels = self.data[label_col].values  # 仅当有标签时加载
        else:
            self.features = self.data.values  # 无标签时，所有列均为特征

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        feature = self.features[idx]
        if self.transform:
            feature = self.transform(feature)
        if self.has_label:
            label = torch.tensor(self.labels[idx], dtype=torch.long)  # 分类任务标签
            return feature, label  # 有标签时返回（特征，标签）
        else:
            return feature  # 无标签时仅返回特征

In [None]:
batch_size = 32
data = CSVDataset('data/train.csv',  has_label=True, label_col='label', transform=transform)
train_size = int(0.8 * len(data))
test_size = len(data)-train_size
train_dataset, test_dataset= random_split(
    data, 
    [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# fig = plt.figure()
# for i in range(12):
#     plt.subplot(3, 4, i+1)
#     plt.tight_layout()
#     image, label = train_dataset[i]
#     plt.imshow(image.reshape(28, 28), cmap='gray', interpolation='none')
#     plt.title("Labels: {}".format(label))
#     plt.xticks([])
#     plt.yticks([])
# plt.show()

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d(kernel_size=2),
        )
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(64*5*5, 50),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(50, 10)
        )

    def forward(self, x):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(batch_size, -1)
        x = self.fc(x)
        return x

In [None]:
model = Net().to(device)

# 查看模型结构
# 打印模型参数总数和可训练参数总数
def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())  # 所有参数数量
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)  # 需要训练的参数数量
    print(f"模型总参数数量: {total_params:,}")
    print(f"模型可训练参数数量: {trainable_params:,}")

print(model)
count_parameters(model)

In [None]:
loss_fn    = torch.nn.CrossEntropyLoss() # 交叉熵损失函数，常用在多分类任务中
learn_rate = 0.01 # 学习率
optimizer  = torch.optim.SGD(model.parameters(), lr=learn_rate, momentum = 0.9)


In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss, train_acc = 0, 0
    for x, y in dataloader:
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        optimizer.zero_grad()
        loss = loss_fn(y_pred,y)
        loss.backward()
        optimizer.step()

        train_acc  += (y_pred.argmax(1) == y).type(torch.float).sum().item()
        train_loss += loss.item()
    train_acc  /= size
    train_loss /= num_batches
    return train_acc, train_loss

In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, test_acc = 0, 0
    for x, y in dataloader:
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        loss = loss_fn(y_pred,y)

        test_acc  += (y_pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss += loss.item()
    test_acc  /= size
    test_loss /= num_batches
    return test_acc, test_loss


In [None]:
def predict(dataloader, model):
    """
    对测试集进行预测，返回所有样本的预测标签
    
    参数：
        dataloader: 测试集的DataLoader（无标签数据）
        model: 训练好的模型
        device: 运行设备（如'cuda'或'cpu'）
    
    返回：
        predictions: 所有样本的预测标签列表（numpy数组）
    """
    model.eval()  # 切换模型到评估模式（关闭dropout、批归一化固定等）
    predictions = []  # 存储所有预测结果
    
    with torch.no_grad():  # 关闭梯度计算，节省内存和计算资源
        for x in dataloader:  # 测试集无标签，每次迭代仅获取特征x
            x = x.to(device)  # 转移数据到指定设备
            y_pred = model(x)  # 模型输出预测概率（形状：[batch_size, num_classes]）
            pred_label = y_pred.argmax(1)  # 取概率最大的类别作为预测标签（形状：[batch_size]）
            predictions.extend(pred_label.cpu().numpy())  # 转移到CPU并转为numpy，存入列表
    return np.array(predictions)

In [None]:
epochs     = 10
train_loss = []
train_acc  = []
test_loss  = []
test_acc   = []
for epoch in range(epochs):
    model.train()
    epoch_train_acc, epoch_train_loss = train(train_loader, model, loss_fn, optimizer)
    train_acc.append(epoch_train_acc)
    train_loss.append(epoch_train_loss)

    model.eval()
    epoch_test_acc, epoch_test_loss = test(test_loader, model, loss_fn)
    test_acc.append(epoch_test_acc)
    test_loss.append(epoch_test_loss)

    template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%，Test_loss:{:.3f}')
    print(template.format(epoch+1, epoch_train_acc*100, epoch_train_loss, epoch_test_acc*100, epoch_test_loss))

In [None]:
epochs_range = range(epochs)

plt.figure(figsize=(12, 3))
plt.subplot(1, 2, 1)

plt.plot(epochs_range, train_acc, label='Training Accuracy')
plt.plot(epochs_range, test_acc, label='Test Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, test_loss, label='Test Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
pred_dataset = CSVDataset('data/test.csv',  has_label=False, transform=transform)
pred_loader = DataLoader(pred_dataset, batch_size=batch_size, shuffle=False)
result = predict(pred_loader, model)
image_ids = np.arange(1, len(result) + 1)
submission_df = pd.DataFrame({
        'ImageId': image_ids,
        'Label': result
    })
submission_df.to_csv('data/submission.csv', index=False)