In [1]:
import torch
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
from torchinfo import summary
import torch.nn.functional as F

In [2]:
# config
config = {
    'num_classes': 10,
    'batch_size': 512,
    'n_epochs': 10,
    'lr': 1e-3,
    'seed': 42,
    'val_ratio': 0.2
}

In [8]:
# trainer
def accuracy(pred, y):
    return sum(torch.argmax(pred, dim=1) == y) / len(y)

def train_step(model, train_loader, criterion, optimizer, device):
    model.train()
    loss_record = []
    acc_record = []
    for X, y in train_loader:
        optimizer.zero_grad()
        y_one_hot = torch.tensor(F.one_hot(y, num_classes=config['num_classes']), dtype=torch.float32)
        X, y, y_one_hot = X.to(device), y.to(device), y_one_hot.to(device)
        pred = model(X)        
        acc = accuracy(pred, y)
        acc_record.append(acc)
        loss = criterion(pred, y_one_hot)
        loss.backward()
        optimizer.step()
        loss_record.append(loss.detach().item())
    mean_train_loss = sum(loss_record) / len(loss_record)
    mean_train_acc = sum(acc_record) / len(acc_record)
    return mean_train_loss, mean_train_acc

def val_step(model, valid_loader, criterion, device):
    model.eval()
    loss_record = []
    acc_record = []
    with torch.no_grad():
        for X, y in valid_loader:
            y_one_hot = torch.tensor(F.one_hot(y, num_classes=config['num_classes']), dtype=torch.float32)
            X, y, y_one_hot = X.to(device), y.to(device), y_one_hot.to(device)
            pred = model(X)
            acc = accuracy(pred, y)
            acc_record.append(acc)
            loss = criterion(pred, y_one_hot)
            loss_record.append(loss.detach().item())
    mean_eval_loss = sum(loss_record) / len(loss_record)
    mean_eval_acc = sum(acc_record) / len(acc_record)
    return mean_eval_loss, mean_eval_acc

def train_model(model, train_loader, valid_loader, criterion, optimizer, n_epochs, device):
    for epoch in range(n_epochs):
        # train
        mean_train_loss, mean_train_acc = train_step(model, train_loader, criterion, optimizer, device)
        # validation
        mean_eval_loss, mean_eval_acc = val_step(model, valid_loader, criterion, device)
        print(f"Epoch [{epoch+1}/{n_epochs}]")
        print(f"Train: loss={mean_train_loss:.4f}, acc={mean_train_acc:.4f}")
        print(f"Val: loss={mean_eval_loss:.4f}, acc={mean_eval_acc:.4f}")

In [4]:
# 加载训练集和测试集
def get_dataloader(batch_size=128, val_ratio=0.2, seed=42, num_workers=4):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    dataset = CIFAR10(root='./datasets', train=True, download=False, transform=transform)
    val_size = int(val_ratio * len(dataset))
    train_size = len(dataset) - val_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(seed))
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return train_dataloader, val_dataloader

train_dataloader, val_dataloader = get_dataloader(batch_size=config['batch_size'], val_ratio=config['val_ratio'], seed=config['seed'])

In [5]:
for X, y in train_dataloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

Shape of X [N, C, H, W]:  torch.Size([256, 3, 32, 32])
Shape of y:  torch.Size([256]) torch.int64


In [6]:
class SimpleConv(nn.Module):
    def __init__(self, in_channels, n_classes=10, conv_layers=[32, 64, 256, 512, 1024]):
        super().__init__()
        layers = []
        for i in range(len(conv_layers)):
            if i == 0:
                layers.append(nn.Conv2d(in_channels, conv_layers[i], 3, stride=1, padding=1))
            else:
                layers.append(nn.Conv2d(conv_layers[i-1], conv_layers[i], 3, stride=1, padding=1))
            layers.append(nn.ReLU(inplace=True))
            layers.append(nn.MaxPool2d(2))
        self.layers = nn.Sequential(*layers)
        self.fc = nn.Linear(conv_layers[-1], n_classes)
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, x):
        x = self.layers(x)
        x = self.fc(x.flatten(1))
        x = self.softmax(x)
        return x

model = SimpleConv(in_channels=3, n_classes=config['num_classes'])
summary(model, (1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
SimpleConv                               [1, 10]                   --
├─Sequential: 1-1                        [1, 1024, 1, 1]           --
│    └─Conv2d: 2-1                       [1, 32, 32, 32]           896
│    └─ReLU: 2-2                         [1, 32, 32, 32]           --
│    └─MaxPool2d: 2-3                    [1, 32, 16, 16]           --
│    └─Conv2d: 2-4                       [1, 64, 16, 16]           18,496
│    └─ReLU: 2-5                         [1, 64, 16, 16]           --
│    └─MaxPool2d: 2-6                    [1, 64, 8, 8]             --
│    └─Conv2d: 2-7                       [1, 256, 8, 8]            147,712
│    └─ReLU: 2-8                         [1, 256, 8, 8]            --
│    └─MaxPool2d: 2-9                    [1, 256, 4, 4]            --
│    └─Conv2d: 2-10                      [1, 512, 4, 4]            1,180,160
│    └─ReLU: 2-11                        [1, 512, 4, 4]            -

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
train_model(model, train_dataloader, val_dataloader, criterion, optimizer, 10, device)

  y_one_hot = torch.tensor(F.one_hot(y, num_classes=config['num_classes']), dtype=torch.float32)
  y_one_hot = torch.tensor(F.one_hot(y, num_classes=config['num_classes']), dtype=torch.float32)


Epoch [1/10]
Train: loss=2.1397, acc=0.3075 | Val: loss=2.0699, acc=0.3859
Epoch [2/10]
Train: loss=1.9914, acc=0.4657 | Val: loss=1.9872, acc=0.4693
Epoch [3/10]
Train: loss=1.9151, acc=0.5427 | Val: loss=1.8703, acc=0.5891
Epoch [4/10]
Train: loss=1.8529, acc=0.6066 | Val: loss=1.8413, acc=0.6147
Epoch [5/10]
Train: loss=1.8125, acc=0.6475 | Val: loss=1.8223, acc=0.6361
Epoch [6/10]
Train: loss=1.7771, acc=0.6842 | Val: loss=1.7931, acc=0.6659
Epoch [7/10]
Train: loss=1.7480, acc=0.7122 | Val: loss=1.7765, acc=0.6812
Epoch [8/10]
Train: loss=1.7196, acc=0.7410 | Val: loss=1.7536, acc=0.7047
Epoch [9/10]
Train: loss=1.7065, acc=0.7538 | Val: loss=1.7667, acc=0.6936
Epoch [10/10]
Train: loss=1.6834, acc=0.7778 | Val: loss=1.7443, acc=0.7171
