Бейзлайн (датасет CIFAR10)

In [None]:
import torch
import torchvision
import torchvision.transforms as T
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import numpy as np
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 128

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

num_classes = 10

resnet = torchvision.models.resnet18(weights=None, num_classes=num_classes)
resnet.to(device)

swin = torchvision.models.swin_v2_t(weights=None, num_classes=num_classes)
swin.to(device)

models = {"ResNet18": resnet}
models["SwinV2Tiny"] = swin


def train_epoch(model, loader, loss_fn, optimizer, device):
    model.train()
    total_loss = 0
    for x, y in tqdm(loader, leave=False):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = loss_fn(out, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * x.size(0)
    return total_loss / len(loader.dataset)

@torch.no_grad()
def eval_metrics(model, loader, device):
    model.eval()
    y_true, y_pred = [], []
    for x, y in loader:
        x = x.to(device)
        preds = model(x).argmax(1).cpu().numpy()
        y_true.extend(y.numpy())
        y_pred.extend(preds)
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')
    cm = confusion_matrix(y_true, y_pred)
    return acc, f1, cm


results = {}
num_epochs = 10

for name, model in models.items():
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        avg_loss = train_epoch(model, trainloader, criterion, optimizer, device)

    acc, f1, cm = eval_metrics(model, testloader, device)
    results[name] = (acc, f1, cm)


print("\n=== Baseline Results ===")
for name, (acc, f1, cm) in results.items():
    print(f"{name}: Accuracy={acc:.4f}, F1={f1:.4f}")

Using device: cuda

=== Baseline Results ===
ResNet18: Accuracy=0.7570, F1=0.7576
SwinV2Tiny: Accuracy=0.6184, F1=0.6125



Улучшение бейзлайна

In [7]:
import torch
import torchvision
import torchvision.transforms as T
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import numpy as np
from tqdm import tqdm

improve_transform = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    T.RandomErasing(p=0.3)
])

test_transform = T.Compose([
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 128

trainset_aug = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=improve_transform)
trainloader_aug = DataLoader(trainset_aug, batch_size=batch_size, shuffle=True, num_workers=2)

testset_aug = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=test_transform)
testloader_aug = DataLoader(testset_aug, batch_size=batch_size, shuffle=False, num_workers=2)

num_classes = 10

resnet = torchvision.models.resnet18(weights=None, num_classes=num_classes).to(device)
swin = torchvision.models.swin_v2_t(weights=None, num_classes=num_classes).to(device)

models = {"ResNet18": resnet, "SwinV2Tiny": swin}

results = {}
num_epochs = 10

for name, model in models.items():
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        avg_loss = train_epoch(model, trainloader_aug, criterion, optimizer, device)
        scheduler.step()
    acc, f1, cm = eval_metrics(model, testloader_aug, device)
    results[name] = (acc, f1, cm)


print("\n=== Improved Baseline Results ===")
for name, (acc, f1, cm) in results.items():
    print(f"{name}: Accuracy={acc:.4f}, F1={f1:.4f}")




=== Improved Baseline Results ===
ResNet18: Accuracy=0.7378, F1=0.7358
SwinV2Tiny: Accuracy=0.1000, F1=0.0182


Собственная реализация

In [8]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(64),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.BatchNorm2d(128),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*8*8, 256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


class TinyViT(nn.Module):
    def __init__(self, num_classes=10, img_size=32, patch_size=4, dim=64, num_layers=2, num_heads=4):
        super().__init__()
        num_patches = (img_size // patch_size) ** 2
        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=num_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.cls_token = nn.Parameter(torch.zeros(1, 1, dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, dim))
        self.mlp_head = nn.Linear(dim, num_classes)
    def forward(self, x):
        B = x.size(0)
        x = self.patch_embed(x).flatten(2).transpose(1,2)
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat([cls_tokens, x], dim=1)
        x = x + self.pos_embed
        x = self.transformer(x)
        x = x[:,0]
        x = self.mlp_head(x)
        return x


models_my = {
    "SimpleCNN": SimpleCNN(num_classes).to(device),
    "TinyViT": TinyViT(num_classes).to(device)
}
results_my = {}
for name, model in models_my.items():
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(num_epochs):
        train_epoch(model, trainloader, nn.CrossEntropyLoss(), optimizer, device)
    acc, f1, cm = eval_metrics(model, testloader, device)
    results_my[name] = (acc, f1, cm)

print("\n=== Self implementation ===")
for name, (acc, f1, cm) in results_my.items():
    print(f"{name}: Accuracy={acc:.4f}, F1={f1:.4f}")




=== Self implementation ===
SimpleCNN: Accuracy=0.7843, F1=0.7820
TinyViT: Accuracy=0.6782, F1=0.6782


Улучшение собственной реализации

In [None]:
improve_transform = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    T.RandomErasing(p=0.3)
])

trainset_aug = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=improve_transform)
trainloader_aug = DataLoader(trainset_aug, batch_size=batch_size, shuffle=True, num_workers=2)

models_my_aug = {
    "SimpleCNN": SimpleCNN(num_classes).to(device),
    "TinyViT": TinyViT(num_classes).to(device)
}
results_my_aug = {}
for name, model in models_my_aug.items():
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)
    for epoch in range(10):
        train_epoch(model, trainloader_aug, nn.CrossEntropyLoss(), optimizer, device)
        scheduler.step()
    acc, f1, cm = eval_metrics(model, testloader, device)
    results_my_aug[name] = (acc, f1, cm)

print("\n=== Self implementation (improved) ===")
for name, (acc, f1, cm) in results_my_aug.items():
    print(f"{name}: Accuracy={acc:.4f}, F1={f1:.4f}")


=== Self implementation (improved) ===
SimpleCNN: Accuracy=0.7991, F1=0.7911
TinyViT: Accuracy=0.6806, F1=0.6405

