In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, TensorDataset
from tensorboardX import SummaryWriter
import time

# 检查是否有可用的GPU
if torch.cuda.is_available():
    # 设置第二块GPU设备
    device = torch.device("cuda:1")  # "cuda:1" 表示第二块GPU
else:
    device = torch.device("cpu")  # 如果没有可用的GPU，则使用CPU
print(device)
num_workers=16
start_time = time.time()

cuda:1


In [15]:

# 定义前馈神经网络模型
class Classifier(nn.Module):
    def __init__(self, input_num, hidden_num, output_num, activation):
        super(Classifier, self).__init__()
        self.activation = activation
        
        self.fc1 = nn.Linear(input_num, hidden_num)
        self.fc2 = nn.Linear(hidden_num, hidden_num)
        self.fc3 = nn.Linear(hidden_num, output_num)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return x

# 定义超参数
input_size = 28 * 28  # 输入特征的维度
hidden_size = 512  # 隐藏层的大小
num_classes = 10  # 类别的数量
learning_rate = 0.001  # 学习率
num_epochs = 20  # 迭代次数
batch_size = 64  # 批次大小

# 加载数据集
train_dataset = MNIST(root='~/Datasets/MNIST', train=True, transform=ToTensor(), download=True)
test_dataset = MNIST(root='~/Datasets/MNIST', train=False, transform=ToTensor(), download=False)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


# 创建模型实例并选择不同激活函数
model_relu = Classifier(input_size, hidden_size, num_classes, activation=nn.ReLU()).to(device)
model_sigmoid = Classifier(input_size, hidden_size, num_classes, activation=nn.Sigmoid()).to(device)
model_tanh = Classifier(input_size, hidden_size, num_classes, activation=nn.Tanh()).to(device)


# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer_relu = optim.Adam(model_relu.parameters(), lr=0.01)
optimizer_sigmoid = optim.Adam(model_sigmoid.parameters(), lr=0.01)
optimizer_tanh = optim.Adam(model_tanh.parameters(), lr=0.01)

# 设置TensorBoardX记录器
writer_relu = SummaryWriter('logs_relu')
writer_sigmoid = SummaryWriter('logs_sigmoid')
writer_tanh = SummaryWriter('logs_tanh')

# 定义训练函数
# 定义训练函数
def train_model(model, optimizer):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0  # 用于累计每个epoch的总损失
        total_samples = 0

        for batch_idx, (data, targets) in enumerate(train_loader):
            data = data.view(-1, input_size).to(device)
            targets = targets.to(device)
            # 前向传播
            outputs = model(data)
            loss = criterion(outputs, targets)
            total_loss += loss.item() * targets.size(0)  # 累计每个batch的损失
            total_samples += targets.size(0)

            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (batch_idx + 1) % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

        avg_loss = total_loss / total_samples  # 计算每个epoch的平均损失
        #writer.add_scalar('Loss', avg_loss, epoch)  # 记录平均损失到TensorBoardX

        # 在测试集上评估模型
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for data, targets in test_loader:
                data = data.view(-1, input_size).to(device)
                targets = targets.to(device)
                outputs = model(data)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

            accuracy = correct / total
            #writer.add_scalar('Accuracy', accuracy, epoch)  # 记录测试准确率到TensorBoardX
            print(f'Test Accuracy: {accuracy:.4f}')



# 训练模型并记录损失函数
train_model(model_relu, optimizer_relu, writer_relu)
train_model(model_sigmoid, optimizer_sigmoid, writer_sigmoid)
train_model(model_tanh, optimizer_tanh, writer_tanh)

# # 关闭TensorBoardX记录器
# writer_relu.close()
# writer_sigmoid.close()
# writer_tanh.close()


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.