In [1]:
import torch
from torchvision import datasets, transforms
import time

# 定义LeNet模型
class LeNet(torch.nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 6, 5)
        self.conv2 = torch.nn.Conv2d(6, 16, 5)
        self.fc1 = torch.nn.Linear(16*4*4, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)
    
    def forward(self, x):
        x = torch.nn.functional.relu(self.conv1(x))
        x = torch.nn.functional.max_pool2d(x, 2)
        x = torch.nn.functional.relu(self.conv2(x))
        x = torch.nn.functional.max_pool2d(x, 2)
        x = x.view(-1, 16*4*4)
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return torch.nn.functional.log_softmax(x, dim=1)

# 定义训练函数
def train_model(workers, epochs, batch_size, target_loss, threshold):
    # 设置随机种子
    torch.manual_seed(123)
    
    # 加载MNIST数据集
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    train_set = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=workers, drop_last=True)
    
    # 定义模型
    model = LeNet()

    # 将模型移至GPU（如果可用）
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # 定义损失函数和优化器
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    
    # 训练模型并计算损失值和运行时间
    start_time = time.time()
    last_loss = float('inf')
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1} - Loss: {avg_loss}")
        
        # 检查是否达到目标损失值或变化小于阀值
        if avg_loss <= target_loss or abs(last_loss - avg_loss) < threshold:
            end_time = time.time()
            total_time = end_time - start_time
            return avg_loss, total_time
        
        last_loss = avg_loss
    
    end_time = time.time()
    total_time = end_time - start_time
    
    return avg_loss, total_time

# 超参数网格搜索函数
def grid_search(target_loss=0.001, threshold=0.0001):
    best_time = float('inf')
    best_hyperparams = {}
    
    # 遍历不同的超参数组合
    for workers in range(1, 9):
        epochs = 25
        for batch_size in [32, 64, 128]:
            print(f"Workers: {workers}, Epochs: {epochs}, Batch Size: {batch_size}")
            
            # 训练模型并获取损失值和运行时间
            loss, time_taken = train_model(workers, epochs, batch_size, target_loss, threshold)
            
            # 只记录达到目标损失值的时间
            if loss <= target_loss and time_taken < best_time:
                best_time = time_taken
                best_hyperparams = {
                    'Workers': workers,
                    'Epochs': epochs,
                    'Batch Size': batch_size
                }
                
            print(f"Loss: {loss}, Time Taken: {time_taken} seconds\n")
    
    print("Best Hyperparameters:")
    print(best_hyperparams)
    print(f"Time Taken to Reach Target Loss {target_loss}: {best_time} seconds")

# 执行超参数搜索
grid_search(target_loss=0.001)


Workers: 1, Epochs: 25, Batch Size: 32
Epoch 1 - Loss: 0.6366098105331262
Epoch 2 - Loss: 0.1321791368328035
Epoch 3 - Loss: 0.09062330510181685
Epoch 4 - Loss: 0.07143850276923427
Epoch 5 - Loss: 0.0611407219560196
Epoch 6 - Loss: 0.053008588328616074
Epoch 7 - Loss: 0.047039182400299855
Epoch 8 - Loss: 0.04262174656980981
Epoch 9 - Loss: 0.03705154229432034
Epoch 10 - Loss: 0.03397160649301174
Epoch 11 - Loss: 0.030209059355648545
Epoch 12 - Loss: 0.028058560539673393
Epoch 13 - Loss: 0.025815237969074707
Epoch 14 - Loss: 0.023665540986872898
Epoch 15 - Loss: 0.021983891549300947
Epoch 16 - Loss: 0.020397621619489898
Epoch 17 - Loss: 0.017893221676081886
Epoch 18 - Loss: 0.01753557671927362
Epoch 19 - Loss: 0.015434892570170148
Epoch 20 - Loss: 0.014682896212597068
Epoch 21 - Loss: 0.013039327890813971
Epoch 22 - Loss: 0.013006148626239155
Loss: 0.013006148626239155, Time Taken: 417.03727674484253 seconds

Workers: 1, Epochs: 25, Batch Size: 64
Epoch 1 - Loss: 1.004535394725703
Epoch