In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 定义 AlexNet
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# 数据预处理
transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 使用 CIFAR-10 数据集作为例子
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 初始化模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AlexNet(num_classes=10).to(device)

# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练
for epoch in range(5):  # 可调整 epochs
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/5], Loss: {running_loss/len(train_loader):.4f}")

# 测试
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 48322309.12it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch [1/5], Loss: 1.7171
Epoch [2/5], Loss: 1.3820
Epoch [3/5], Loss: 1.2384
Epoch [4/5], Loss: 1.1311
Epoch [5/5], Loss: 1.0511
Accuracy: 65.49%


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time
from tqdm import tqdm

# 配置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 数据预处理和增强
transform_train = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])  # CIFAR-10标准化参数
])

transform_test = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

# 加载数据集
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=4, pin_memory=True)

class ModernAlexNet(nn.Module):
    def __init__(self, num_classes=10, dropout_rate=0.5):
        super(ModernAlexNet, self).__init__()
        self.features = nn.Sequential(
            # 第一个卷积块
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(0.25),
            
            # 第二个卷积块
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.BatchNorm2d(192),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(0.25),
            
            # 第三个卷积块
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU(inplace=True),
            
            # 第四个卷积块
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            
            # 第五个卷积块
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(0.25)
        )
        
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes)
        )
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def train_one_epoch(model, train_loader, criterion, optimizer, device, scheduler=None):
    model.train()
    losses = AverageMeter()
    top1 = AverageMeter()
    
    pbar = tqdm(train_loader, desc='Training')
    for batch_idx, (images, labels) in enumerate(pbar):
        images, labels = images.to(device), labels.to(device)
        
        # 混合精度训练
        with torch.cuda.amp.autocast(enabled=True):
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        # 计算准确率
        _, predicted = outputs.max(1)
        correct = predicted.eq(labels).sum().item()
        acc = 100. * correct / labels.size(0)
        
        losses.update(loss.item(), images.size(0))
        top1.update(acc, images.size(0))
        
        # 优化器步骤
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        if scheduler is not None:
            scheduler.step()
        
        # 更新进度条
        pbar.set_postfix({
            'loss': f'{losses.avg:.4f}',
            'acc': f'{top1.avg:.2f}%'
        })
    
    return losses.avg, top1.avg

def evaluate(model, test_loader, criterion, device):
    model.eval()
    losses = AverageMeter()
    top1 = AverageMeter()
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            _, predicted = outputs.max(1)
            correct = predicted.eq(labels).sum().item()
            acc = 100. * correct / labels.size(0)
            
            losses.update(loss.item(), images.size(0))
            top1.update(acc, images.size(0))
    
    return losses.avg, top1.avg

def main():
    # 初始化模型
    model = ModernAlexNet().to(device)
    criterion = nn.CrossEntropyLoss()
    
    # 使用AdamW优化器
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    
    # 使用 OneCycleLR 调度器
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr=0.001,
        epochs=50, steps_per_epoch=len(train_loader)
    )
    
    # 训练配置
    n_epochs = 50
    best_acc = 0
    
    # 训练循环
    for epoch in range(n_epochs):
        start_time = time.time()
        
        # 训练阶段
        train_loss, train_acc = train_one_epoch(
            model, train_loader, criterion, optimizer, device, scheduler
        )
        
        # 评估阶段
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)
        
        # 保存最佳模型
        if test_acc > best_acc:
            best_acc = test_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_acc': best_acc,
            }, 'best_alexnet_cifar10.pth')
        
        # 打印训练信息
        epoch_time = time.time() - start_time
        print(f"\nEpoch [{epoch+1}/{n_epochs}]")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")
        print(f"Time: {epoch_time:.2f}s")
        print("-" * 60)
    
    print(f"Best Test Accuracy: {best_acc:.2f}%")

if __name__ == "__main__":
    main()

Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


  with torch.cuda.amp.autocast(enabled=True):
Training: 100%|██████████| 391/391 [01:37<00:00,  4.00it/s, loss=1.9172, acc=28.67%]



Epoch [1/50]
Train Loss: 1.9172, Train Acc: 28.67%
Test Loss: 1.5585, Test Acc: 42.27%
Time: 110.40s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.79it/s, loss=1.6336, acc=39.73%]



Epoch [2/50]
Train Loss: 1.6336, Train Acc: 39.73%
Test Loss: 1.4163, Test Acc: 47.76%
Time: 116.41s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.79it/s, loss=1.5121, acc=44.75%]



Epoch [3/50]
Train Loss: 1.5121, Train Acc: 44.75%
Test Loss: 1.2909, Test Acc: 52.63%
Time: 116.08s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.79it/s, loss=1.4106, acc=48.87%]



Epoch [4/50]
Train Loss: 1.4106, Train Acc: 48.87%
Test Loss: 1.1730, Test Acc: 57.31%
Time: 116.80s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.78it/s, loss=1.3108, acc=52.68%]



Epoch [5/50]
Train Loss: 1.3108, Train Acc: 52.68%
Test Loss: 1.0081, Test Acc: 64.39%
Time: 116.19s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.77it/s, loss=1.2207, acc=56.07%]



Epoch [6/50]
Train Loss: 1.2207, Train Acc: 56.07%
Test Loss: 0.9736, Test Acc: 65.19%
Time: 116.70s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.78it/s, loss=1.1599, acc=59.01%]



Epoch [7/50]
Train Loss: 1.1599, Train Acc: 59.01%
Test Loss: 0.9082, Test Acc: 67.95%
Time: 116.86s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.78it/s, loss=1.1090, acc=61.01%]



Epoch [8/50]
Train Loss: 1.1090, Train Acc: 61.01%
Test Loss: 0.8732, Test Acc: 68.50%
Time: 116.63s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.79it/s, loss=1.0669, acc=62.56%]



Epoch [9/50]
Train Loss: 1.0669, Train Acc: 62.56%
Test Loss: 0.9112, Test Acc: 67.96%
Time: 115.13s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:42<00:00,  3.80it/s, loss=1.0473, acc=63.50%]



Epoch [10/50]
Train Loss: 1.0473, Train Acc: 63.50%
Test Loss: 0.7877, Test Acc: 72.74%
Time: 115.97s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:44<00:00,  3.74it/s, loss=1.0115, acc=64.99%]



Epoch [11/50]
Train Loss: 1.0115, Train Acc: 64.99%
Test Loss: 0.7600, Test Acc: 73.97%
Time: 117.56s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.77it/s, loss=0.9727, acc=66.13%]



Epoch [12/50]
Train Loss: 0.9727, Train Acc: 66.13%
Test Loss: 0.7573, Test Acc: 73.44%
Time: 115.64s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:44<00:00,  3.76it/s, loss=0.9470, acc=67.65%]



Epoch [13/50]
Train Loss: 0.9470, Train Acc: 67.65%
Test Loss: 0.7011, Test Acc: 75.97%
Time: 116.30s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:42<00:00,  3.81it/s, loss=0.9154, acc=68.36%]



Epoch [14/50]
Train Loss: 0.9154, Train Acc: 68.36%
Test Loss: 0.7921, Test Acc: 71.58%
Time: 114.53s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.79it/s, loss=0.8848, acc=69.46%]



Epoch [15/50]
Train Loss: 0.8848, Train Acc: 69.46%
Test Loss: 0.6229, Test Acc: 78.31%
Time: 116.21s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:42<00:00,  3.80it/s, loss=0.8481, acc=70.79%]



Epoch [16/50]
Train Loss: 0.8481, Train Acc: 70.79%
Test Loss: 0.6157, Test Acc: 78.82%
Time: 115.11s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:39<00:00,  3.93it/s, loss=0.8282, acc=71.62%]



Epoch [17/50]
Train Loss: 0.8282, Train Acc: 71.62%
Test Loss: 0.6206, Test Acc: 79.23%
Time: 112.11s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:39<00:00,  3.94it/s, loss=0.7935, acc=72.65%]



Epoch [18/50]
Train Loss: 0.7935, Train Acc: 72.65%
Test Loss: 0.5786, Test Acc: 80.30%
Time: 111.85s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:38<00:00,  3.96it/s, loss=0.7739, acc=73.52%]



Epoch [19/50]
Train Loss: 0.7739, Train Acc: 73.52%
Test Loss: 0.5760, Test Acc: 80.02%
Time: 110.10s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:40<00:00,  3.87it/s, loss=0.7479, acc=74.62%]



Epoch [20/50]
Train Loss: 0.7479, Train Acc: 74.62%
Test Loss: 0.5571, Test Acc: 80.44%
Time: 113.28s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:41<00:00,  3.84it/s, loss=0.7259, acc=75.05%]



Epoch [21/50]
Train Loss: 0.7259, Train Acc: 75.05%
Test Loss: 0.5394, Test Acc: 81.34%
Time: 114.70s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:41<00:00,  3.85it/s, loss=0.7057, acc=75.70%]



Epoch [22/50]
Train Loss: 0.7057, Train Acc: 75.70%
Test Loss: 0.5376, Test Acc: 81.74%
Time: 114.29s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:40<00:00,  3.88it/s, loss=0.6821, acc=76.51%]



Epoch [23/50]
Train Loss: 0.6821, Train Acc: 76.51%
Test Loss: 0.5113, Test Acc: 82.56%
Time: 113.22s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:41<00:00,  3.84it/s, loss=0.6643, acc=77.15%]



Epoch [24/50]
Train Loss: 0.6643, Train Acc: 77.15%
Test Loss: 0.4920, Test Acc: 83.35%
Time: 114.58s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:40<00:00,  3.88it/s, loss=0.6445, acc=77.77%]



Epoch [25/50]
Train Loss: 0.6445, Train Acc: 77.77%
Test Loss: 0.4711, Test Acc: 84.07%
Time: 113.93s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:41<00:00,  3.85it/s, loss=0.6269, acc=78.60%]



Epoch [26/50]
Train Loss: 0.6269, Train Acc: 78.60%
Test Loss: 0.4466, Test Acc: 84.60%
Time: 114.05s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:47<00:00,  3.63it/s, loss=0.6058, acc=79.46%]



Epoch [27/50]
Train Loss: 0.6058, Train Acc: 79.46%
Test Loss: 0.4653, Test Acc: 84.12%
Time: 119.75s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:52<00:00,  3.47it/s, loss=0.5824, acc=80.08%]



Epoch [28/50]
Train Loss: 0.5824, Train Acc: 80.08%
Test Loss: 0.4456, Test Acc: 84.87%
Time: 126.59s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:48<00:00,  3.61it/s, loss=0.5670, acc=80.53%]



Epoch [29/50]
Train Loss: 0.5670, Train Acc: 80.53%
Test Loss: 0.4366, Test Acc: 85.00%
Time: 121.77s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:45<00:00,  3.70it/s, loss=0.5601, acc=80.74%]



Epoch [30/50]
Train Loss: 0.5601, Train Acc: 80.74%
Test Loss: 0.4235, Test Acc: 85.57%
Time: 118.61s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:44<00:00,  3.74it/s, loss=0.5383, acc=81.50%]



Epoch [31/50]
Train Loss: 0.5383, Train Acc: 81.50%
Test Loss: 0.4420, Test Acc: 85.04%
Time: 115.86s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:45<00:00,  3.71it/s, loss=0.5210, acc=82.09%]



Epoch [32/50]
Train Loss: 0.5210, Train Acc: 82.09%
Test Loss: 0.4020, Test Acc: 86.49%
Time: 119.15s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:45<00:00,  3.71it/s, loss=0.5118, acc=82.33%]



Epoch [33/50]
Train Loss: 0.5118, Train Acc: 82.33%
Test Loss: 0.3943, Test Acc: 86.70%
Time: 118.61s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:47<00:00,  3.65it/s, loss=0.4982, acc=83.00%]



Epoch [34/50]
Train Loss: 0.4982, Train Acc: 83.00%
Test Loss: 0.3871, Test Acc: 86.69%
Time: 118.18s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:45<00:00,  3.72it/s, loss=0.4815, acc=83.45%]



Epoch [35/50]
Train Loss: 0.4815, Train Acc: 83.45%
Test Loss: 0.3701, Test Acc: 87.46%
Time: 118.59s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:45<00:00,  3.72it/s, loss=0.4741, acc=83.88%]



Epoch [36/50]
Train Loss: 0.4741, Train Acc: 83.88%
Test Loss: 0.3664, Test Acc: 87.48%
Time: 117.95s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:43<00:00,  3.76it/s, loss=0.4627, acc=84.07%]



Epoch [37/50]
Train Loss: 0.4627, Train Acc: 84.07%
Test Loss: 0.3567, Test Acc: 87.44%
Time: 115.33s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:46<00:00,  3.67it/s, loss=0.4478, acc=84.60%]



Epoch [38/50]
Train Loss: 0.4478, Train Acc: 84.60%
Test Loss: 0.3694, Test Acc: 87.28%
Time: 118.20s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:52<00:00,  3.46it/s, loss=0.4389, acc=84.95%]



Epoch [39/50]
Train Loss: 0.4389, Train Acc: 84.95%
Test Loss: 0.3440, Test Acc: 88.42%
Time: 126.30s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:52<00:00,  3.48it/s, loss=0.4297, acc=85.06%]



Epoch [40/50]
Train Loss: 0.4297, Train Acc: 85.06%
Test Loss: 0.3468, Test Acc: 88.06%
Time: 124.48s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:52<00:00,  3.47it/s, loss=0.4200, acc=85.57%]



Epoch [41/50]
Train Loss: 0.4200, Train Acc: 85.57%
Test Loss: 0.3448, Test Acc: 88.28%
Time: 125.21s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:52<00:00,  3.47it/s, loss=0.4130, acc=85.56%]



Epoch [42/50]
Train Loss: 0.4130, Train Acc: 85.56%
Test Loss: 0.3454, Test Acc: 88.28%
Time: 124.51s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:54<00:00,  3.42it/s, loss=0.4083, acc=85.76%]



Epoch [43/50]
Train Loss: 0.4083, Train Acc: 85.76%
Test Loss: 0.3366, Test Acc: 88.52%
Time: 127.93s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:53<00:00,  3.44it/s, loss=0.3980, acc=86.32%]



Epoch [44/50]
Train Loss: 0.3980, Train Acc: 86.32%
Test Loss: 0.3382, Test Acc: 88.43%
Time: 125.97s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:53<00:00,  3.45it/s, loss=0.3944, acc=86.32%]



Epoch [45/50]
Train Loss: 0.3944, Train Acc: 86.32%
Test Loss: 0.3361, Test Acc: 88.63%
Time: 127.88s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:53<00:00,  3.43it/s, loss=0.3991, acc=86.40%]



Epoch [46/50]
Train Loss: 0.3991, Train Acc: 86.40%
Test Loss: 0.3373, Test Acc: 88.53%
Time: 125.97s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:52<00:00,  3.49it/s, loss=0.3918, acc=86.44%]



Epoch [47/50]
Train Loss: 0.3918, Train Acc: 86.44%
Test Loss: 0.3362, Test Acc: 88.45%
Time: 124.83s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:52<00:00,  3.46it/s, loss=0.3844, acc=86.53%]



Epoch [48/50]
Train Loss: 0.3844, Train Acc: 86.53%
Test Loss: 0.3346, Test Acc: 88.51%
Time: 124.88s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:53<00:00,  3.44it/s, loss=0.3904, acc=86.59%]



Epoch [49/50]
Train Loss: 0.3904, Train Acc: 86.59%
Test Loss: 0.3329, Test Acc: 88.56%
Time: 125.52s
------------------------------------------------------------


Training: 100%|██████████| 391/391 [01:53<00:00,  3.44it/s, loss=0.3873, acc=86.36%]



Epoch [50/50]
Train Loss: 0.3873, Train Acc: 86.36%
Test Loss: 0.3343, Test Acc: 88.57%
Time: 125.66s
------------------------------------------------------------
Best Test Accuracy: 88.63%


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time
from tqdm import tqdm
import numpy as np

# 设置随机种子
def set_seed(seed=42):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

# 配置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 数据预处理和数据增强
transform_train = transforms.Compose([
    transforms.Resize(256),  # 先调整为较大尺寸
    transforms.RandomCrop(227),  # 随机裁剪到目标尺寸
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(227),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

class ModernAlexNet(nn.Module):
    def __init__(self, num_classes=10, dropout_rate=0.5):
        super(ModernAlexNet, self).__init__()
        
        # 特征提取器
        self.features = nn.Sequential(
            # Conv1
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.BatchNorm2d(96),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(0.2),
            
            # Conv2
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(0.2),
            
            # Conv3
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.2),
            
            # Conv4
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU(inplace=True),
            nn.Dropout2d(0.2),
            
            # Conv5
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Dropout2d(0.2)
        )
        
        # 计算卷积层输出大小
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        
        # 分类器
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(4096),
            nn.Dropout(dropout_rate),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, num_classes)
        )
        
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

def train_epoch(model, train_loader, criterion, optimizer, device, scheduler=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        if scheduler is not None:
            scheduler.step()
        
        # 统计
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # 更新进度条
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    return running_loss / len(train_loader), 100. * correct / total

def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(test_loader), 100. * correct / total

def main():
    # 设置随机种子
    set_seed(42)
    
    # 加载数据集
    train_dataset = datasets.CIFAR10(
        root='./data', train=True, transform=transform_train, download=True)
    test_dataset = datasets.CIFAR10(
        root='./data', train=False, transform=transform_test, download=True)
    
    train_loader = DataLoader(
        train_dataset, batch_size=64, shuffle=True,
        num_workers=2, pin_memory=True)
    test_loader = DataLoader(
        test_dataset, batch_size=128, shuffle=False,
        num_workers=2, pin_memory=True)
    
    # 初始化模型和训练组件
    model = ModernAlexNet(num_classes=10).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    
    # 学习率调度
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=0.001,
        epochs=30,
        steps_per_epoch=len(train_loader),
        pct_start=0.1
    )
    
    # 训练循环
    best_acc = 0
    epochs = 30
    
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        start_time = time.time()
        
        # 训练
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, device, scheduler)
        
        # 评估
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)
        
        # 保存最佳模型
        if test_acc > best_acc:
            best_acc = test_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_acc': best_acc,
            }, 'best_alexnet_cifar10.pth')
        
        # 打印训练信息
        epoch_time = time.time() - start_time
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")
        print(f"Time: {epoch_time:.2f}s")
        print("-" * 60)
    
    print(f"Best Test Accuracy: {best_acc:.2f}%")

if __name__ == "__main__":
    main()

Using device: cuda
Files already downloaded and verified
Files already downloaded and verified

Epoch 1/30


Training: 100%|██████████| 782/782 [02:13<00:00,  5.84it/s, loss=2.1432, acc=32.58%]


Train Loss: 1.8393, Train Acc: 32.58%
Test Loss: 1.4404, Test Acc: 46.78%
Time: 147.43s
------------------------------------------------------------

Epoch 2/30


Training: 100%|██████████| 782/782 [02:06<00:00,  6.17it/s, loss=1.4615, acc=44.94%]


Train Loss: 1.5272, Train Acc: 44.94%
Test Loss: 1.2453, Test Acc: 54.33%
Time: 140.31s
------------------------------------------------------------

Epoch 3/30


Training: 100%|██████████| 782/782 [02:06<00:00,  6.17it/s, loss=1.0620, acc=51.71%]


Train Loss: 1.3503, Train Acc: 51.71%
Test Loss: 1.1067, Test Acc: 60.79%
Time: 140.54s
------------------------------------------------------------

Epoch 4/30


Training: 100%|██████████| 782/782 [02:05<00:00,  6.22it/s, loss=1.1442, acc=57.96%]


Train Loss: 1.1910, Train Acc: 57.96%
Test Loss: 0.9313, Test Acc: 67.15%
Time: 139.15s
------------------------------------------------------------

Epoch 5/30


Training: 100%|██████████| 782/782 [02:06<00:00,  6.19it/s, loss=1.0318, acc=61.73%]


Train Loss: 1.0856, Train Acc: 61.73%
Test Loss: 0.8249, Test Acc: 71.43%
Time: 139.79s
------------------------------------------------------------

Epoch 6/30


Training: 100%|██████████| 782/782 [02:06<00:00,  6.17it/s, loss=1.3154, acc=65.10%]


Train Loss: 0.9990, Train Acc: 65.10%
Test Loss: 0.7802, Test Acc: 72.56%
Time: 140.07s
------------------------------------------------------------

Epoch 7/30


Training: 100%|██████████| 782/782 [02:07<00:00,  6.13it/s, loss=0.9548, acc=67.21%]


Train Loss: 0.9391, Train Acc: 67.21%
Test Loss: 0.6995, Test Acc: 76.22%
Time: 141.29s
------------------------------------------------------------

Epoch 8/30


Training: 100%|██████████| 782/782 [02:11<00:00,  5.95it/s, loss=0.8413, acc=69.32%]


Train Loss: 0.8783, Train Acc: 69.32%
Test Loss: 0.6764, Test Acc: 76.46%
Time: 145.19s
------------------------------------------------------------

Epoch 9/30


Training: 100%|██████████| 782/782 [02:12<00:00,  5.91it/s, loss=0.6860, acc=70.87%]


Train Loss: 0.8348, Train Acc: 70.87%
Test Loss: 0.6408, Test Acc: 78.06%
Time: 146.16s
------------------------------------------------------------

Epoch 10/30


Training: 100%|██████████| 782/782 [02:12<00:00,  5.92it/s, loss=1.1482, acc=72.32%]


Train Loss: 0.7930, Train Acc: 72.32%
Test Loss: 0.5933, Test Acc: 80.60%
Time: 145.56s
------------------------------------------------------------

Epoch 11/30


Training: 100%|██████████| 782/782 [02:08<00:00,  6.07it/s, loss=0.6305, acc=73.90%]


Train Loss: 0.7484, Train Acc: 73.90%
Test Loss: 0.8408, Test Acc: 77.54%
Time: 141.33s
------------------------------------------------------------

Epoch 12/30


Training: 100%|██████████| 782/782 [02:08<00:00,  6.08it/s, loss=0.8238, acc=75.16%]


Train Loss: 0.7219, Train Acc: 75.16%
Test Loss: 0.5344, Test Acc: 81.91%
Time: 142.29s
------------------------------------------------------------

Epoch 13/30


Training: 100%|██████████| 782/782 [02:09<00:00,  6.05it/s, loss=0.3821, acc=75.97%]


Train Loss: 0.6902, Train Acc: 75.97%
Test Loss: 0.5628, Test Acc: 82.37%
Time: 142.63s
------------------------------------------------------------

Epoch 14/30


Training: 100%|██████████| 782/782 [02:09<00:00,  6.03it/s, loss=0.6333, acc=76.70%]


Train Loss: 0.6664, Train Acc: 76.70%
Test Loss: 0.5518, Test Acc: 83.52%
Time: 143.55s
------------------------------------------------------------

Epoch 15/30


Training: 100%|██████████| 782/782 [02:09<00:00,  6.04it/s, loss=0.6448, acc=77.92%]


Train Loss: 0.6367, Train Acc: 77.92%
Test Loss: 0.5151, Test Acc: 83.71%
Time: 142.94s
------------------------------------------------------------

Epoch 16/30


Training: 100%|██████████| 782/782 [02:10<00:00,  5.99it/s, loss=0.3184, acc=78.61%]


Train Loss: 0.6120, Train Acc: 78.61%
Test Loss: 0.4773, Test Acc: 84.22%
Time: 144.41s
------------------------------------------------------------

Epoch 17/30


Training: 100%|██████████| 782/782 [02:09<00:00,  6.05it/s, loss=0.2618, acc=79.56%]


Train Loss: 0.5908, Train Acc: 79.56%
Test Loss: 0.4569, Test Acc: 85.02%
Time: 142.77s
------------------------------------------------------------

Epoch 18/30


Training: 100%|██████████| 782/782 [02:09<00:00,  6.04it/s, loss=0.6724, acc=80.15%]


Train Loss: 0.5697, Train Acc: 80.15%
Test Loss: 0.4461, Test Acc: 85.18%
Time: 143.83s
------------------------------------------------------------

Epoch 19/30


Training: 100%|██████████| 782/782 [02:12<00:00,  5.90it/s, loss=0.8683, acc=80.76%]


Train Loss: 0.5535, Train Acc: 80.76%
Test Loss: 0.4269, Test Acc: 85.96%
Time: 145.98s
------------------------------------------------------------

Epoch 20/30


Training: 100%|██████████| 782/782 [02:11<00:00,  5.94it/s, loss=0.6293, acc=81.47%]


Train Loss: 0.5333, Train Acc: 81.47%
Test Loss: 0.4107, Test Acc: 86.04%
Time: 145.73s
------------------------------------------------------------

Epoch 21/30


Training: 100%|██████████| 782/782 [02:11<00:00,  5.92it/s, loss=0.6764, acc=81.97%]


Train Loss: 0.5211, Train Acc: 81.97%
Test Loss: 0.3957, Test Acc: 86.57%
Time: 145.56s
------------------------------------------------------------

Epoch 22/30


Training: 100%|██████████| 782/782 [02:13<00:00,  5.85it/s, loss=0.3866, acc=82.42%]


Train Loss: 0.5041, Train Acc: 82.42%
Test Loss: 0.3968, Test Acc: 86.88%
Time: 147.43s
------------------------------------------------------------

Epoch 23/30


Training: 100%|██████████| 782/782 [02:13<00:00,  5.88it/s, loss=0.9490, acc=82.82%]


Train Loss: 0.4927, Train Acc: 82.82%
Test Loss: 0.3974, Test Acc: 86.94%
Time: 147.12s
------------------------------------------------------------

Epoch 24/30


Training: 100%|██████████| 782/782 [02:13<00:00,  5.86it/s, loss=0.6365, acc=83.22%]


Train Loss: 0.4812, Train Acc: 83.22%
Test Loss: 0.3951, Test Acc: 87.12%
Time: 147.13s
------------------------------------------------------------

Epoch 25/30


Training: 100%|██████████| 782/782 [02:12<00:00,  5.92it/s, loss=0.6318, acc=83.39%]


Train Loss: 0.4793, Train Acc: 83.39%
Test Loss: 0.3867, Test Acc: 87.34%
Time: 146.17s
------------------------------------------------------------

Epoch 26/30


Training: 100%|██████████| 782/782 [02:11<00:00,  5.94it/s, loss=0.2813, acc=84.00%]


Train Loss: 0.4613, Train Acc: 84.00%
Test Loss: 0.3700, Test Acc: 87.40%
Time: 145.31s
------------------------------------------------------------

Epoch 27/30


Training: 100%|██████████| 782/782 [02:13<00:00,  5.88it/s, loss=0.6923, acc=84.11%]


Train Loss: 0.4592, Train Acc: 84.11%
Test Loss: 0.3812, Test Acc: 87.43%
Time: 146.93s
------------------------------------------------------------

Epoch 28/30


Training: 100%|██████████| 782/782 [02:12<00:00,  5.88it/s, loss=0.2357, acc=84.16%]


Train Loss: 0.4571, Train Acc: 84.16%
Test Loss: 0.3650, Test Acc: 87.66%
Time: 146.86s
------------------------------------------------------------

Epoch 29/30


Training: 100%|██████████| 782/782 [02:12<00:00,  5.89it/s, loss=0.7366, acc=84.33%]


Train Loss: 0.4515, Train Acc: 84.33%
Test Loss: 0.3650, Test Acc: 87.70%
Time: 146.32s
------------------------------------------------------------

Epoch 30/30


Training: 100%|██████████| 782/782 [02:12<00:00,  5.91it/s, loss=0.3171, acc=84.22%]


Train Loss: 0.4545, Train Acc: 84.22%
Test Loss: 0.3672, Test Acc: 87.61%
Time: 144.93s
------------------------------------------------------------
Best Test Accuracy: 87.70%


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import time
from tqdm import tqdm

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 数据预处理和增强
transform_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(227),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(227),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

# 加载数据集
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform_test, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# 加载预训练的 ResNet50
model = models.resnet50(pretrained=True)

# 修改最后一层以适应 CIFAR-10（10 类）
model.fc = nn.Linear(model.fc.in_features, 10)

# 冻结所有层，除了最后一层
for param in model.parameters():
    param.requires_grad = False

for param in model.fc.parameters():
    param.requires_grad = True

model.to(device)

# 设置优化器和损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# 训练过程
epochs = 30
best_acc = 0

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(train_loader, desc="Training")
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 统计
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        pbar.set_postfix({'loss': f'{loss.item():.4f}', 'acc': f'{100.*correct/total:.2f}%'})

    print(f"Train Loss: {running_loss/len(train_loader):.4f}, Train Acc: {100.*correct/total:.2f}%")

    # 评估
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Test Loss: {test_loss/len(test_loader):.4f}, Test Acc: {100.*correct/total:.2f}%")

    # 保存最佳模型
    if correct / total > best_acc:
        best_acc = correct / total
        torch.save(model.state_dict(), 'best_model.pth')

print(f"Best Test Accuracy: {best_acc:.2f}%")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 179MB/s]


Epoch 1/30


Training: 100%|██████████| 782/782 [04:37<00:00,  2.82it/s, loss=0.8799, acc=70.94%]


Train Loss: 0.8913, Train Acc: 70.94%
Test Loss: 0.7026, Test Acc: 75.88%
Epoch 2/30


Training: 100%|██████████| 782/782 [04:30<00:00,  2.89it/s, loss=0.4403, acc=76.04%]


Train Loss: 0.6945, Train Acc: 76.04%
Test Loss: 0.6531, Test Acc: 77.25%
Epoch 3/30


Training: 100%|██████████| 782/782 [04:29<00:00,  2.90it/s, loss=0.6010, acc=76.86%]


Train Loss: 0.6701, Train Acc: 76.86%
Test Loss: 0.6895, Test Acc: 76.23%
Epoch 4/30


Training: 100%|██████████| 782/782 [04:29<00:00,  2.91it/s, loss=0.7496, acc=77.46%]


Train Loss: 0.6474, Train Acc: 77.46%
Test Loss: 0.5959, Test Acc: 79.31%
Epoch 5/30


Training: 100%|██████████| 782/782 [04:29<00:00,  2.90it/s, loss=0.3768, acc=78.39%]


Train Loss: 0.6303, Train Acc: 78.39%
Test Loss: 0.6712, Test Acc: 76.39%
Epoch 6/30


Training: 100%|██████████| 782/782 [04:29<00:00,  2.90it/s, loss=0.4954, acc=78.62%]


Train Loss: 0.6237, Train Acc: 78.62%
Test Loss: 0.6004, Test Acc: 79.11%
Epoch 7/30


Training: 100%|██████████| 782/782 [04:30<00:00,  2.89it/s, loss=1.2169, acc=78.65%]


Train Loss: 0.6155, Train Acc: 78.65%
Test Loss: 0.5957, Test Acc: 79.49%
Epoch 8/30


Training: 100%|██████████| 782/782 [04:34<00:00,  2.85it/s, loss=0.3846, acc=78.92%]


Train Loss: 0.6133, Train Acc: 78.92%
Test Loss: 0.6548, Test Acc: 77.59%
Epoch 9/30


Training: 100%|██████████| 782/782 [04:36<00:00,  2.83it/s, loss=1.1036, acc=79.34%]


Train Loss: 0.6008, Train Acc: 79.34%
Test Loss: 0.5906, Test Acc: 79.48%
Epoch 10/30


Training: 100%|██████████| 782/782 [04:36<00:00,  2.83it/s, loss=0.7555, acc=79.41%]


Train Loss: 0.5962, Train Acc: 79.41%
Test Loss: 0.5896, Test Acc: 79.38%
Epoch 11/30


Training: 100%|██████████| 782/782 [04:36<00:00,  2.82it/s, loss=0.7146, acc=79.96%]


Train Loss: 0.5843, Train Acc: 79.96%
Test Loss: 0.5897, Test Acc: 79.51%
Epoch 12/30


Training: 100%|██████████| 782/782 [04:36<00:00,  2.83it/s, loss=0.2937, acc=79.94%]


Train Loss: 0.5809, Train Acc: 79.94%
Test Loss: 0.5582, Test Acc: 80.66%
Epoch 13/30


Training: 100%|██████████| 782/782 [04:37<00:00,  2.82it/s, loss=0.5629, acc=80.33%]


Train Loss: 0.5761, Train Acc: 80.33%
Test Loss: 0.5852, Test Acc: 79.46%
Epoch 14/30


Training: 100%|██████████| 782/782 [04:37<00:00,  2.81it/s, loss=0.3959, acc=80.14%]


Train Loss: 0.5742, Train Acc: 80.14%
Test Loss: 0.5714, Test Acc: 79.97%
Epoch 15/30


Training: 100%|██████████| 782/782 [04:37<00:00,  2.82it/s, loss=0.4911, acc=80.33%]


Train Loss: 0.5715, Train Acc: 80.33%
Test Loss: 0.5760, Test Acc: 80.22%
Epoch 16/30


Training: 100%|██████████| 782/782 [04:37<00:00,  2.82it/s, loss=0.3962, acc=80.37%]


Train Loss: 0.5689, Train Acc: 80.37%
Test Loss: 0.5700, Test Acc: 80.35%
Epoch 17/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.6726, acc=80.65%]


Train Loss: 0.5645, Train Acc: 80.65%
Test Loss: 0.5473, Test Acc: 80.92%
Epoch 18/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.8292, acc=80.65%]


Train Loss: 0.5621, Train Acc: 80.65%
Test Loss: 0.5789, Test Acc: 80.24%
Epoch 19/30


Training: 100%|██████████| 782/782 [04:36<00:00,  2.82it/s, loss=0.1951, acc=80.99%]


Train Loss: 0.5550, Train Acc: 80.99%
Test Loss: 0.5691, Test Acc: 80.40%
Epoch 20/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=1.1810, acc=80.76%]


Train Loss: 0.5574, Train Acc: 80.76%
Test Loss: 0.5499, Test Acc: 81.11%
Epoch 21/30


Training: 100%|██████████| 782/782 [04:34<00:00,  2.84it/s, loss=0.6091, acc=81.10%]


Train Loss: 0.5482, Train Acc: 81.10%
Test Loss: 0.5395, Test Acc: 81.32%
Epoch 22/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.8617, acc=81.32%]


Train Loss: 0.5420, Train Acc: 81.32%
Test Loss: 0.5626, Test Acc: 80.39%
Epoch 23/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.5601, acc=81.23%]


Train Loss: 0.5450, Train Acc: 81.23%
Test Loss: 0.5424, Test Acc: 81.08%
Epoch 24/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.5113, acc=81.26%]


Train Loss: 0.5407, Train Acc: 81.26%
Test Loss: 0.5517, Test Acc: 81.11%
Epoch 25/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.9318, acc=81.44%]


Train Loss: 0.5381, Train Acc: 81.44%
Test Loss: 0.5418, Test Acc: 81.36%
Epoch 26/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.83it/s, loss=0.3615, acc=81.46%]


Train Loss: 0.5375, Train Acc: 81.46%
Test Loss: 0.5250, Test Acc: 82.06%
Epoch 27/30


Training: 100%|██████████| 782/782 [04:34<00:00,  2.85it/s, loss=0.6042, acc=81.25%]


Train Loss: 0.5370, Train Acc: 81.25%
Test Loss: 0.5057, Test Acc: 82.26%
Epoch 28/30


Training: 100%|██████████| 782/782 [04:36<00:00,  2.83it/s, loss=0.7416, acc=81.37%]


Train Loss: 0.5401, Train Acc: 81.37%
Test Loss: 0.5425, Test Acc: 81.40%
Epoch 29/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.7423, acc=81.63%]


Train Loss: 0.5336, Train Acc: 81.63%
Test Loss: 0.5687, Test Acc: 80.94%
Epoch 30/30


Training: 100%|██████████| 782/782 [04:35<00:00,  2.84it/s, loss=0.8613, acc=81.50%]


Train Loss: 0.5336, Train Acc: 81.50%
Test Loss: 0.5209, Test Acc: 81.65%
Best Test Accuracy: 0.82%


# vit_base_patch16_224',

In [5]:
pip install timm torch torchvision


Note: you may need to restart the kernel to use updated packages.


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time
from tqdm import tqdm
import timm
import numpy as np

# 设置随机种子
def set_seed(seed=42):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

# 配置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 数据预处理和数据增强
transform_train = transforms.Compose([
    transforms.Resize(256),  # 先调整为较大尺寸
    transforms.RandomCrop(224),  # 随机裁剪到目标尺寸
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])

# 加载 CIFAR-10 数据集
train_dataset = datasets.CIFAR10(
    root='./data', train=True, transform=transform_train, download=True)
test_dataset = datasets.CIFAR10(
    root='./data', train=False, transform=transform_test, download=True)

train_loader = DataLoader(
    train_dataset, batch_size=64, shuffle=True,
    num_workers=2, pin_memory=True)
test_loader = DataLoader(
    test_dataset, batch_size=128, shuffle=False,
    num_workers=2, pin_memory=True)

# 使用 ViT 模型（如果 CaiT 模型不可用）
model = timm.create_model('vit_base_patch16_224', pretrained=True)
model.head = nn.Linear(model.head.in_features, 10)  # 修改输出层，适应 CIFAR-10 数据集（10 类）

model.to(device)

# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# 学习率调度器
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=0.001,
    epochs=30,
    steps_per_epoch=len(train_loader),
    pct_start=0.1
)

# 设置随机种子
set_seed(42)

# 训练和评估函数
def train_epoch(model, train_loader, criterion, optimizer, device, scheduler=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc='Training')
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        if scheduler is not None:
            scheduler.step()
        
        # 统计
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # 更新进度条
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{100.*correct/total:.2f}%'
        })
    
    return running_loss / len(train_loader), 100. * correct / total

def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return running_loss / len(test_loader), 100. * correct / total

# 训练过程
def main():
    best_acc = 0
    epochs = 3
    
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        start_time = time.time()
        
        # 训练
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, device, scheduler)
        
        # 评估
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)
        
        # 保存最佳模型
        if test_acc > best_acc:
            best_acc = test_acc
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_acc': best_acc,
            }, 'best_cait_model.pth')
        
        # 打印训练信息
        epoch_time = time.time() - start_time
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")
        print(f"Time: {epoch_time:.2f}s")
        print("-" * 60)
    
    print(f"Best Test Accuracy: {best_acc:.2f}%")

if __name__ == "__main__":
    main()


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]


Epoch 1/3


Training: 100%|██████████| 782/782 [28:10<00:00,  2.16s/it, loss=0.2711, acc=89.91%]


Train Loss: 0.3131, Train Acc: 89.91%
Test Loss: 0.4209, Test Acc: 85.81%
Time: 1800.78s
------------------------------------------------------------

Epoch 2/3


Training: 100%|██████████| 782/782 [28:10<00:00,  2.16s/it, loss=1.9200, acc=70.94%]


Train Loss: 0.8530, Train Acc: 70.94%
Test Loss: 1.5533, Test Acc: 45.63%
Time: 1798.61s
------------------------------------------------------------

Epoch 3/3


Training: 100%|██████████| 782/782 [28:06<00:00,  2.16s/it, loss=1.6653, acc=43.43%]


Train Loss: 1.5640, Train Acc: 43.43%
Test Loss: 1.6118, Test Acc: 39.96%
Time: 1794.38s
------------------------------------------------------------
Best Test Accuracy: 85.81%


# The end