In [1]:
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader

In [2]:
# 读取数据
train_path = "mnist_train.csv"
test_path = "mnist_test.csv"
train_data = pd.read_csv(train_path, header=None)
test_data = pd.read_csv(test_path, header=None)

In [3]:

# 分离特征和标签
y_train = train_data.iloc[:, 0].values
x_train = train_data.iloc[:, 1:].values / 255.0  # 归一化

y_test = test_data.iloc[:, 0].values
x_test = test_data.iloc[:, 1:].values / 255.0  # 归一化c

In [4]:
# 转换为Tensor
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [5]:
# 创建DataLoader
batch_size = 64
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

### 构建神经网络


In [9]:
import torch.nn as nn

class MNIST(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(0.2)  # 添加Dropout防止过拟合

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x


In [14]:
# 初始化模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MNIST().to(device)
print(model)
import torch.optim as optim
from tqdm.auto import tqdm

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 10

train_losses = []
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch + 1}'):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)
    print(f'Epoch {epoch + 1} Loss: {epoch_loss:.4f}')

MNIST(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


Epoch 1:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 1 Loss: 0.2648


Epoch 2:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 2 Loss: 0.1103


Epoch 3:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 3 Loss: 0.0779


Epoch 4:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 4 Loss: 0.0610


Epoch 5:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 5 Loss: 0.0503


Epoch 6:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 6 Loss: 0.0441


Epoch 7:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 7 Loss: 0.0416


Epoch 8:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 8 Loss: 0.0314


Epoch 9:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 9 Loss: 0.0306


Epoch 10:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 10 Loss: 0.0293


### 测试模型

In [15]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

Test Accuracy: 98.30%
