In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import matplotlib.pyplot as plt

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
#数据预处理
data_transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(), 
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [6]:
# 加载数据
train_sets = datasets.CIFAR10(root='cifar_10', train=True, download=True, transform=data_transform)
train_loader = DataLoader(train_sets, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
test_sets = datasets.CIFAR10(root='cifar_10', train=False, download=True, transform=data_transform)
test_loader = DataLoader(test_sets, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

In [7]:
class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super(Inception, self).__init__()
        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, ch1x1, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, ch3x3red, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(ch3x3red, ch3x3, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, ch5x5red, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(ch5x5red, ch5x5, kernel_size=5, padding=2),
            nn.ReLU(inplace=True)
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, pool_proj, kernel_size=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        return torch.cat([branch1, branch2, branch3, branch4], 1)

In [8]:
# 定义模型
class GoogLeNet(nn.Module):
    def __init__(self, num_classes=10):
        super(GoogLeNet, self).__init__()
        self.pre_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.inception1 = Inception(64, 64, 96, 128, 16, 32, 32)
        self.inception2 = Inception(256, 128, 128, 192, 32, 96, 64)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(480, num_classes)

    def forward(self, x):
        x = self.pre_layers(x)
        x = self.inception1(x)
        x = self.inception2(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

In [9]:
model = GoogLeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [10]:
train_losses = []
# 训练函数
def train(epoch):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:
            avg_loss = running_loss / 100
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] Loss: {avg_loss:.3f}')
            train_losses.append(avg_loss)  # 记录每个100批次的平均损失
            running_loss = 0.0

In [11]:

# 训练函数
def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set: {accuracy:.2f}%')
    return accuracy

In [12]:
def plot_train_loss():
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Training Loss')
    plt.title('Training Loss Over Time')
    plt.xlabel('Iterations (per 100 batches')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig('train_loss_curve.png')  #  saving as picture
    plt.close()
    print("Training loss curve has been saved as 'train_loss_curve.png'")

In [None]:
num_epochs = 5
best_accuracy = 0.0
for epoch in range(num_epochs):
    train(epoch)
    accuracy = test()
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(model.state_dict(), 'googlenet_cifar10_best.pth')

# 可视化训练损失
plot_train_loss()

print(f'训练完成！最佳测试准确率: {best_accuracy:.2f}%')


[Epoch 1, Batch 100] Loss: 2.300
[Epoch 1, Batch 200] Loss: 2.288
[Epoch 1, Batch 300] Loss: 2.220
[Epoch 1, Batch 400] Loss: 2.104
[Epoch 1, Batch 500] Loss: 2.088
[Epoch 1, Batch 600] Loss: 2.076
[Epoch 1, Batch 700] Loss: 2.040
Accuracy on test set: 26.61%
[Epoch 2, Batch 100] Loss: 2.005
[Epoch 2, Batch 200] Loss: 2.004
[Epoch 2, Batch 300] Loss: 1.990
[Epoch 2, Batch 400] Loss: 1.973
[Epoch 2, Batch 500] Loss: 1.947
[Epoch 2, Batch 600] Loss: 1.936
[Epoch 2, Batch 700] Loss: 1.917
Accuracy on test set: 31.66%
[Epoch 3, Batch 100] Loss: 1.885
[Epoch 3, Batch 200] Loss: 1.829
[Epoch 3, Batch 300] Loss: 1.820
[Epoch 3, Batch 400] Loss: 1.793
[Epoch 3, Batch 500] Loss: 1.778
[Epoch 3, Batch 600] Loss: 1.776
[Epoch 3, Batch 700] Loss: 1.737
Accuracy on test set: 34.77%
[Epoch 4, Batch 100] Loss: 1.722
[Epoch 4, Batch 200] Loss: 1.718
[Epoch 4, Batch 300] Loss: 1.725
[Epoch 4, Batch 400] Loss: 1.722
[Epoch 4, Batch 500] Loss: 1.727
[Epoch 4, Batch 600] Loss: 1.689
[Epoch 4, Batch 700] L