In [3]:
# CNN训练调优实验扩展实现：CIFAR-10，ResNet-18，自定义CNN，优化器与学习率策略对比

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18
import time
import os
import matplotlib.pyplot as plt
from ptflops import get_model_complexity_info

# 设置超参数
batch_size = 16
epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# CIFAR-10 数据增强与加载
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# 自定义CNN结构
class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(8*8*128, 256), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# 所有实验组合
optimizers = ['sgd', 'adam', 'rmsprop']
schedulers = ['step', 'cosine', 'plateau']

# 实验主函数
def run_experiments(model_name='custom', log_dir='logs'):
    os.makedirs(log_dir, exist_ok=True)
    results = []
    for opt_name in optimizers:
        for sch_name in schedulers:
            print(f"\n==== Running Experiment: Optimizer={opt_name}, Scheduler={sch_name} ====")

            # 初始化模型
            if model_name == 'resnet':
                model = resnet18(num_classes=10,  pretrained=False)
            else:
                model = CustomCNN()
            model = model.to(device)

            # 定义优化器
            if opt_name == 'sgd':
                optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
            elif opt_name == 'adam':
                optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
            elif opt_name == 'rmsprop':
                optimizer = optim.RMSprop(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)

            # 定义调度器
            if sch_name == 'step':
                scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
            elif sch_name == 'cosine':
                scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
            elif sch_name == 'plateau':
                scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

            criterion = nn.CrossEntropyLoss()
            train_losses, train_accs = [], []

            # 开始训练
            start_train_time = time.time()
            for epoch in range(epochs):
                model.train()
                running_loss, correct, total = 0.0, 0, 0
                for inputs, targets in trainloader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    loss.backward()
                    optimizer.step()

                    running_loss += loss.item() * targets.size(0)
                    _, predicted = outputs.max(1)
                    total += targets.size(0)
                    correct += predicted.eq(targets).sum().item()

                acc = 100. * correct / total
                loss = running_loss / total
                train_losses.append(loss)
                train_accs.append(acc)
                print(f"Epoch {epoch+1}/{epochs} | Loss: {loss:.4f} | Acc: {acc:.2f}%")

                if sch_name == 'plateau':
                    scheduler.step(loss)
                else:
                    scheduler.step()

            train_time = time.time() - start_train_time

            # 测试
            model.eval()
            correct, total = 0, 0
            start_test_time = time.time()
            with torch.no_grad():
                for inputs, targets in testloader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    _, predicted = outputs.max(1)
                    total += targets.size(0)
                    correct += predicted.eq(targets).sum().item()
            test_time = time.time() - start_test_time
            test_acc = 100. * correct / total

            # FLOPs & 参数
            with torch.cuda.device(0):
                macs, params = get_model_complexity_info(model, (3, 32, 32), as_strings=False, print_per_layer_stat=False)
            model_size = os.path.getsize(torch.save(model.state_dict(), os.path.join(log_dir, f"temp_{opt_name}_{sch_name}.pth")) or os.path.join(log_dir, f"temp_{opt_name}_{sch_name}.pth")) / 1024 / 1024

            # 保存日志
            log_path = os.path.join(log_dir, f"log_{opt_name}_{sch_name}.txt")
            with open(log_path, 'w') as f:
                f.write(f"Optimizer: {opt_name}\n")
                f.write(f"Scheduler: {sch_name}\n")
                f.write(f"Final Train Acc: {train_accs[-1]:.2f}%\n")
                f.write(f"Test Acc: {test_acc:.2f}%\n")
                f.write(f"Train Time: {train_time:.2f}s\n")
                f.write(f"Test Time: {test_time:.2f}s\n")
                f.write(f"FLOPs: {macs / 1e6:.2f} MFLOPs\n")
                f.write(f"Params: {params / 1e6:.2f} M\n")
                f.write(f"Model Size: {model_size:.2f} MB\n")

            # 绘图
            plt.figure()
            plt.plot(range(1, epochs+1), train_losses, label='Loss')
            plt.plot(range(1, epochs+1), train_accs, label='Accuracy')
            plt.title(f"{opt_name.upper()} + {sch_name}")
            plt.xlabel("Epoch")
            plt.legend()
            plt.savefig(os.path.join(log_dir, f"curve_{opt_name}_{sch_name}.png"))
            plt.close()

            results.append((opt_name, sch_name, train_accs[-1], test_acc))

    return results

# 执行所有实验
run_experiments(model_name='resnet')

Files already downloaded and verified
Files already downloaded and verified

==== Running Experiment: Optimizer=sgd, Scheduler=step ====
Epoch 1/50 | Loss: 2.1241 | Acc: 22.41%
Epoch 2/50 | Loss: 1.8484 | Acc: 31.21%
Epoch 3/50 | Loss: 1.7943 | Acc: 33.71%
Epoch 4/50 | Loss: 1.7483 | Acc: 35.75%
Epoch 5/50 | Loss: 1.7231 | Acc: 37.09%
Epoch 6/50 | Loss: 1.6961 | Acc: 38.77%
Epoch 7/50 | Loss: 1.6935 | Acc: 39.19%
Epoch 8/50 | Loss: 1.6973 | Acc: 39.01%
Epoch 9/50 | Loss: 1.7055 | Acc: 39.01%
Epoch 10/50 | Loss: 1.7007 | Acc: 38.63%
Epoch 11/50 | Loss: 1.7005 | Acc: 39.22%
Epoch 12/50 | Loss: 1.7065 | Acc: 38.62%
Epoch 13/50 | Loss: 1.6904 | Acc: 39.51%
Epoch 14/50 | Loss: 1.6964 | Acc: 39.18%
Epoch 15/50 | Loss: 1.7104 | Acc: 38.22%
Epoch 16/50 | Loss: 1.7108 | Acc: 38.46%
Epoch 17/50 | Loss: 1.7424 | Acc: 37.01%
Epoch 18/50 | Loss: 1.7222 | Acc: 37.50%
Epoch 19/50 | Loss: 1.7597 | Acc: 36.88%
Epoch 20/50 | Loss: 1.7527 | Acc: 37.01%
Epoch 21/50 | Loss: 1.7537 | Acc: 36.86%
Epoch 22/50

[('sgd', 'step', 68.62, 71.53),
 ('sgd', 'cosine', 83.464, 82.89),
 ('sgd', 'plateau', 61.36, 65.79),
 ('adam', 'step', 83.492, 83.53),
 ('adam', 'cosine', 87.382, 84.69),
 ('adam', 'plateau', 75.646, 77.82),
 ('rmsprop', 'step', 65.968, 69.29),
 ('rmsprop', 'cosine', 61.88, 65.91),
 ('rmsprop', 'plateau', 49.512, 56.01)]