In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import matplotlib.pyplot as plt
import numpy as np
import os
from datetime import datetime
import json

# Set non-interactive backend for matplotlib (suitable for headless servers like Alibaba Cloud)
plt.switch_backend('agg')

# Device configuration: Use GPU if available, else CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 设定对图片的归一化处理方式
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 4  # Consider increasing to 32-128 for better GPU utilization

# 下载数据集
trainset = torchvision.datasets.CIFAR10(root='./dataset', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2, pin_memory=True)

testset = torchvision.datasets.CIFAR10(root='./dataset', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2, pin_memory=True)


# 网络结构定义
class Net(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


def predict(testloader, net, device):
    """
    测试函数，评估模型在测试集上的准确率
    
    参数:
        testloader: 测试数据加载器
        net: 神经网络模型
        device: 计算设备 (GPU or CPU)
    
    返回:
        accuracy: 测试集准确率
    """
    correct = 0  # 预测正确的图片数
    total = 0    # 总共的图片数
    
    # 设置为评估模式（Dropout会失效）
    net.eval()
    
    with torch.no_grad():  # 正向传播时不计算梯度
        for data in testloader:
            # 1. 取出数据并移动到设备
            images, labels = data[0].to(device), data[1].to(device)
            # 2. 正向传播，得到输出结果
            outputs = net(images)
            # 3. 从输出中得到模型预测
            _, predicted = torch.max(outputs, 1)
            # 4. 计算性能指标
            total += labels.size(0)
            correct += (predicted == labels).sum()
    
    accuracy = 100 * correct.item() / total  # 使用 .item() 以避免 Tensor 类型
    print('测试集中的准确率为: %d %%' % accuracy)
    return accuracy


def train_and_evaluate(trainloader, testloader, hyperparams, save_path, device):
    """
    使用指定的超参数训练模型并评估
    
    参数:
        trainloader: 训练数据加载器
        testloader: 测试数据加载器
        hyperparams: 超参数字典
        save_path: 结果保存路径
        device: 计算设备 (GPU or CPU)
    
    返回:
        results: 包含训练和测试结果的字典
    """
    # 解析超参数
    num_epochs = hyperparams['num_epochs']
    lr = hyperparams['learning_rate']
    momentum = hyperparams['momentum']
    weight_decay = hyperparams['weight_decay']
    dropout_rate = hyperparams['dropout_rate']
    
    # 创建模型并移动到设备
    net = Net(dropout_rate=dropout_rate).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, 
                         weight_decay=weight_decay)
    
    # 记录训练过程
    train_losses = []
    test_accuracies = []
    epoch_train_losses = []
    
    print(f"\n{'='*70}")
    print(f"开始训练 - 超参数配置:")
    print(f"  学习率(lr): {lr}")
    print(f"  训练轮数(num_epochs): {num_epochs}")
    print(f"  动量(momentum): {momentum}")
    print(f"  L2正则化(weight_decay): {weight_decay}")
    print(f"  Dropout率(dropout_rate): {dropout_rate}")
    print(f"{'='*70}\n")
    
    for epoch in range(num_epochs):
        # 训练阶段
        net.train()
        running_loss = 0.0
        epoch_loss = 0.0
        num_batches = 0
        
        for i, data in enumerate(trainloader, 0):
            # 数据移动到设备
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            current_loss = loss.item()
            running_loss += current_loss
            epoch_loss += current_loss
            num_batches += 1
            train_losses.append(current_loss)
            
            if i % 2000 == 1999:
                avg_loss = running_loss / 2000
                print(f'Epoch {epoch + 1}, Batch {i + 1:5d}, Loss: {avg_loss:.3f}')
                running_loss = 0.0
        
        # 记录epoch平均损失
        avg_epoch_loss = epoch_loss / num_batches
        epoch_train_losses.append(avg_epoch_loss)
        
        # 测试阶段 - 使用predict函数
        accuracy = predict(testloader, net, device)
        test_accuracies.append(accuracy)
        
        print(f'Epoch {epoch + 1} - 训练损失: {avg_epoch_loss:.3f}, '
              f'测试准确率: {accuracy:.2f}%')
    
    # 最终测试 - 使用predict函数
    print('\n最终测试:')
    final_accuracy = predict(testloader, net, device)
    
    # 保存结果
    results = {
        'hyperparams': hyperparams,
        'train_losses': train_losses,
        'epoch_train_losses': epoch_train_losses,
        'test_accuracies': test_accuracies,
        'final_accuracy': final_accuracy
    }
    
    return results


def run_hyperparameter_experiments(device):
    """
    运行多组超参数实验
    """
    # 定义要测试的超参数组合
    experiments = [
        # 基准实验
        {
            'name': 'benchmark_configuration',
            'num_epochs': 5,
            'learning_rate': 0.001,
            'momentum': 0.9,
            'weight_decay': 0.01,
            'dropout_rate': 0.5
        },
        # 实验1: 增加学习率
        {
            'name': 'high_learning_rate',
            'num_epochs': 5,
            'learning_rate': 0.01,  # 10倍学习率
            'momentum': 0.9,
            'weight_decay': 0.01,
            'dropout_rate': 0.5
        },
        # 实验2: 降低学习率
        {
            'name': 'low_learning_rate',
            'num_epochs': 5,
            'learning_rate': 0.0001,  # 0.1倍学习率
            'momentum': 0.9,
            'weight_decay': 0.01,
            'dropout_rate': 0.5
        },
        # 实验3: 更多训练轮数
        {
            'name': 'more epoch',
            'num_epochs': 10,  # 2倍epoch
            'learning_rate': 0.001,
            'momentum': 0.9,
            'weight_decay': 0.01,
            'dropout_rate': 0.5
        },
        # 实验4: 无正则化
        {
            'name': 'without regularization',
            'num_epochs': 5,
            'learning_rate': 0.001,
            'momentum': 0.9,
            'weight_decay': 0,  # 无L2正则化
            'dropout_rate': 0    # 无Dropout
        },
        # 实验5: 强正则化
        {
            'name': 'strengthend regularization',
            'num_epochs': 5,
            'learning_rate': 0.001,
            'momentum': 0.9,
            'weight_decay': 0.1,  # 10倍L2正则化
            'dropout_rate': 0.7   # 更高的Dropout率
        },
        # 实验6: 无动量
        {
            'name': 'without momentum',
            'num_epochs': 5,
            'learning_rate': 0.001,
            'momentum': 0,  # 无动量
            'weight_decay': 0.01,
            'dropout_rate': 0.5
        }
    ]
    
    # 运行所有实验
    all_results = []
    save_path = './hyperparameter_experiments'
    os.makedirs(save_path, exist_ok=True)
    
    for i, exp in enumerate(experiments):
        print(f"\n\n{'#'*70}")
        print(f"实验 {i+1}/{len(experiments)}: {exp['name']}")
        print(f"{'#'*70}")
        
        results = train_and_evaluate(trainloader, testloader, exp, save_path, device)
        results['experiment_name'] = exp['name']
        all_results.append(results)
    
    # 保存所有实验结果
    results_file = os.path.join(save_path, 'all_results.json')
    with open(results_file, 'w') as f:
        json.dump(all_results, f, indent=4)
    print(f"\n所有实验结果已保存至: {results_file}")
    
    return all_results


def visualize_results(all_results, save_path='./hyperparameter_experiments'):
    """
    可视化所有实验结果
    """
    os.makedirs(save_path, exist_ok=True)
    
    # 图1: 对比不同配置的训练损失
    plt.figure(figsize=(15, 10))
    
    # 子图1: epoch级别的训练损失对比
    plt.subplot(2, 2, 1)
    for result in all_results:
        epochs = range(1, len(result['epoch_train_losses']) + 1)
        plt.plot(epochs, result['epoch_train_losses'], 
                marker='o', label=result['experiment_name'])
    plt.xlabel('Epoch')
    plt.ylabel('Average Training Loss')
    plt.title('train loss comparison (each Epoch)')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True, alpha=0.3)
    
    # 子图2: 测试准确率对比
    plt.subplot(2, 2, 2)
    for result in all_results:
        epochs = range(1, len(result['test_accuracies']) + 1)
        plt.plot(epochs, result['test_accuracies'], 
                marker='s', label=result['experiment_name'])
    plt.xlabel('Epoch')
    plt.ylabel('Test Accuracy (%)')
    plt.title('test accuracy comparison')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True, alpha=0.3)
    
    # 子图3: 最终测试准确率柱状图
    plt.subplot(2, 2, 3)
    names = [r['experiment_name'] for r in all_results]
    final_accs = [r['final_accuracy'] for r in all_results]
    colors = plt.cm.viridis(np.linspace(0, 1, len(names)))
    bars = plt.bar(range(len(names)), final_accs, color=colors)
    plt.xlabel('experiment configurations')
    plt.ylabel('Final Test Accuracy (%)')
    plt.title('Final Test Accuracy Comparison')
    plt.xticks(range(len(names)), names, rotation=45, ha='right')
    plt.grid(True, alpha=0.3, axis='y')
    
    # 在柱状图上标注数值
    for i, (bar, acc) in enumerate(zip(bars, final_accs)):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
                f'{acc:.2f}%', ha='center', va='bottom', fontsize=9)
    
    # 子图4: 最终损失 vs 最终准确率散点图
    plt.subplot(2, 2, 4)
    final_losses = [r['epoch_train_losses'][-1] for r in all_results]
    plt.scatter(final_losses, final_accs, s=200, c=colors, alpha=0.6)
    for i, name in enumerate(names):
        plt.annotate(name, (final_losses[i], final_accs[i]), 
                    fontsize=8, ha='right')
    plt.xlabel('final training loss')
    plt.ylabel('final test accuracy (%)')
    plt.title('Final Loss vs Final Accuracy')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plot_path = os.path.join(save_path, 'hyperparameter_comparison.png')
    plt.savefig(plot_path, dpi=300, bbox_inches='tight')
    print(f'\n对比图已保存至: {plot_path}')
    plt.close()  # Close figure to free memory on server
    
    # 图2: 详细的训练曲线（仅显示前3个实验，避免过于拥挤）
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    axes = axes.flatten()
    
    for i, result in enumerate(all_results[:6]):  # 最多显示6个
        # 训练损失（使用移动平均）
        train_losses = result['train_losses']
        if len(train_losses) > 100:
            window_size = 100
            moving_avg = np.convolve(train_losses, 
                                    np.ones(window_size)/window_size, 
                                    mode='valid')
            axes[i].plot(moving_avg, color='blue', alpha=0.8, linewidth=1.5)
        else:
            axes[i].plot(train_losses, color='blue', alpha=0.8, linewidth=1.5)
        
        axes[i].set_title(f"{result['experiment_name']}\n"
                         f"最终准确率: {result['final_accuracy']:.2f}%")
        axes[i].set_xlabel('Batch')
        axes[i].set_ylabel('Loss')
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    detail_plot_path = os.path.join(save_path, 'detailed_training_curves.png')
    plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
    print(f'详细训练曲线已保存至: {detail_plot_path}')
    plt.close()  # Close figure
    
    # 生成实验结果汇总表
    print("\n" + "="*80)
    print("实验结果汇总表")
    print("="*80)
    print(f"{'实验名称':<15} {'学习率':<10} {'Epoch':<8} {'L2正则':<10} "
          f"{'Dropout':<10} {'最终损失':<12} {'最终准确率':<12}")
    print("-"*80)
    
    for result in all_results:
        hp = result['hyperparams']
        print(f"{result['experiment_name']:<15} "
              f"{hp['learning_rate']:<10.4f} "
              f"{hp['num_epochs']:<8} "
              f"{hp['weight_decay']:<10.4f} "
              f"{hp['dropout_rate']:<10.2f} "
              f"{result['epoch_train_losses'][-1]:<12.4f} "
              f"{result['final_accuracy']:<12.2f}%")
    print("="*80)


# 主程序
if __name__ == '__main__':
    print("开始超参数调优实验...")
    print("这将运行多组实验，可能需要较长时间，请耐心等待。\n")
    
    # 运行实验
    all_results = run_hyperparameter_experiments(device)
    
    # 可视化结果
    print("\n\n生成可视化结果...")
    visualize_results(all_results)
    
    print("\n实验完成！")
    print("所有结果已保存在 './hyperparameter_experiments' 目录下")

Using device: cuda:0
Files already downloaded and verified
Files already downloaded and verified
开始超参数调优实验...
这将运行多组实验，可能需要较长时间，请耐心等待。



######################################################################
实验 1/7: 基准配置
######################################################################

开始训练 - 超参数配置:
  学习率(lr): 0.001
  训练轮数(num_epochs): 5
  动量(momentum): 0.9
  L2正则化(weight_decay): 0.01
  Dropout率(dropout_rate): 0.5

Epoch 1, Batch  2000, Loss: 2.291
Epoch 1, Batch  4000, Loss: 2.048
Epoch 1, Batch  6000, Loss: 1.857
Epoch 1, Batch  8000, Loss: 1.776
Epoch 1, Batch 10000, Loss: 1.716
Epoch 1, Batch 12000, Loss: 1.673
测试集中的准确率为: 41 %
Epoch 1 - 训练损失: 1.883, 测试准确率: 41.35%
Epoch 2, Batch  2000, Loss: 1.606
Epoch 2, Batch  4000, Loss: 1.596
Epoch 2, Batch  6000, Loss: 1.585
Epoch 2, Batch  8000, Loss: 1.558
Epoch 2, Batch 10000, Loss: 1.570
Epoch 2, Batch 12000, Loss: 1.569
测试集中的准确率为: 47 %
Epoch 2 - 训练损失: 1.579, 测试准确率: 47.09%
Epoch 3, Batch  2000, Loss: 1.516
Epoch 3, Batch  4000, Loss

  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(plo


对比图已保存至: ./hyperparameter_experiments/hyperparameter_comparison.png


  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bbox_inches='tight')
  plt.savefig(detail_plot_path, dpi=300, bb

详细训练曲线已保存至: ./hyperparameter_experiments/detailed_training_curves.png

实验结果汇总表
实验名称            学习率        Epoch    L2正则       Dropout    最终损失         最终准确率       
--------------------------------------------------------------------------------
基准配置            0.0010     5        0.0100     0.50       1.4336       50.68       %
高学习率            0.0100     5        0.0100     0.50       2.0633       22.29       %
低学习率            0.0001     5        0.0100     0.50       1.6921       40.21       %
更多epoch         0.0010     10       0.0100     0.50       1.3792       54.93       %
无正则化            0.0010     5        0.0000     0.00       1.0241       61.35       %
强正则化            0.0010     5        0.1000     0.70       2.3029       10.00       %
无动量             0.0010     5        0.0100     0.50       1.7448       36.51       %

实验完成！
所有结果已保存在 './hyperparameter_experiments' 目录下
