In [1]:
import time
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import pickle
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

Определяем функции для обучения одной эпохи, оценки модели (с общей и per-class accuracy) и полного цикла обучения с сохранением лучших весов

In [2]:
SEED = 42
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

DATA_ROOT = 'C:\\IMPORTANT\\NSU\\3\\ML\\course_work\\.data_cifar10'

DOWNLOAD = True
cifar10_path = os.path.join(DATA_ROOT, 'cifar-10-batches-py')
if os.path.exists(cifar10_path):
    DOWNLOAD = False

train_tf_32 = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
])

test_tf_32 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
])

train_ds_32 = torchvision.datasets.CIFAR10(root=DATA_ROOT, train=True, transform=train_tf_32, download=DOWNLOAD)
test_ds_32  = torchvision.datasets.CIFAR10(root=DATA_ROOT, train=False, transform=test_tf_32, download=DOWNLOAD)

SUBSET_TRAIN = 10000
SUBSET_TEST  = 2000

def make_subset(ds, n=None, seed=SEED):
    if n is None or n >= len(ds):
        return ds
    idx = np.random.RandomState(seed).choice(len(ds), size=n, replace=False)
    return Subset(ds, idx)

train_ds_32_q = make_subset(train_ds_32, SUBSET_TRAIN)
test_ds_32_q  = make_subset(test_ds_32,  SUBSET_TEST)

BATCH_SIZE = 128 if DEVICE.type == 'cuda' else 64

train_ld_32 = DataLoader(train_ds_32_q, batch_size=BATCH_SIZE, shuffle=True)
test_ld_32  = DataLoader(test_ds_32_q,  batch_size=BATCH_SIZE, shuffle=False)

NUM_CLASSES = 10
print(f'DEVICE={DEVICE.type} | 32x32: train={len(train_ds_32_q)}, test={len(test_ds_32_q)}')

DEVICE=cpu | 32x32: train=10000, test=2000


In [3]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running = 0.0
    for images, targets in loader:
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        logits = model(images)
        loss = criterion(logits, targets)
        loss.backward()
        optimizer.step()
        running += loss.item()
    return running / max(1, len(loader))

@torch.no_grad()
def evaluate(model, loader, device, num_classes=10):
    model.eval()
    correct, total = 0, 0
    per_class_correct = np.zeros(num_classes, dtype=np.int64)
    per_class_total = np.zeros(num_classes, dtype=np.int64)
    for images, targets in loader:
        images, targets = images.to(device), targets.to(device)
        logits = model(images)
        preds = logits.argmax(dim=1)
        correct += (preds == targets).sum().item()
        total += targets.size(0)
        t_np = targets.cpu().numpy()
        p_np = preds.cpu().numpy()
        for t, p in zip(t_np, p_np):
            per_class_total[t] += 1
            per_class_correct[t] += int(t == p)
    acc = correct / max(1, total)
    per_class_acc = (per_class_correct / np.maximum(1, per_class_total)).tolist()
    return acc, per_class_acc

def fit_model(model, train_loader, val_loader, epochs=5, lr=1e-3, device=torch.device('cpu'), name='model', num_classes=10):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_acc = 0.0
    best_path = f'{name}_best.pth'
    last_path = f'{name}_last.pth'

    for epoch in range(1, epochs+1):
        t0 = time.time()
        tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_acc, _ = evaluate(model, val_loader, device, num_classes=num_classes)
        dt = time.time() - t0
        print(f'Epoch {epoch}/{epochs} | train_loss={tr_loss:.4f} | val_acc={val_acc:.4f} | {int(dt)}s')

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), best_path)

        torch.save(model.state_dict(), last_path)

    best_model = type(model)()
    if hasattr(best_model, 'classifier') or hasattr(best_model, 'features'):
        try:
            best_model = type(model)(num_classes=num_classes)
        except:
            pass
    best_model = best_model.to(device)
    best_model.load_state_dict(torch.load(best_path, map_location=device))
    final_acc, per_class_acc = evaluate(best_model, val_loader, device, num_classes=num_classes)
    print(f'Best val_acc(saved)={best_acc:.4f} | Reloaded val_acc={final_acc:.4f}')
    print(f'Weights saved: best -> {best_path}, last -> {last_path}')
    return best_path, last_path, final_acc, per_class_acc

Используем простой сверточный классификатор под 32×32 (несколько блоков conv–relu–maxpool + полносвязная голова). Обучаем модель напрямую на изображениях без промежуточных признаков, сохраняем лучшие веса, печатаем accuracy по классам

На cpu учимся около 11 минут

In [4]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*4*4, 512), nn.ReLU(inplace=True), nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

EPOCHS = 50
model_simple = SimpleCNN(num_classes=NUM_CLASSES)
path_simple_best, path_simple_last, acc_simple, pc_simple = fit_model(
    model_simple, train_ld_32, test_ld_32,
    epochs=EPOCHS, lr=1e-3, device=DEVICE,
    name='simplecnn32', num_classes=NUM_CLASSES
)
print(f'SimpleCNN: acc={acc_simple:.4f}')
print('Per-class accuracy:')
for i, v in enumerate(pc_simple):
    print(f'  class {i}: {v:.4f}')

Epoch 1/50 | train_loss=1.9303 | val_acc=0.4135 | 11s
Epoch 2/50 | train_loss=1.6213 | val_acc=0.4630 | 11s
Epoch 3/50 | train_loss=1.4847 | val_acc=0.4915 | 11s
Epoch 4/50 | train_loss=1.3848 | val_acc=0.5445 | 11s
Epoch 5/50 | train_loss=1.2894 | val_acc=0.5955 | 12s
Epoch 6/50 | train_loss=1.2088 | val_acc=0.6015 | 13s
Epoch 7/50 | train_loss=1.1245 | val_acc=0.6370 | 13s
Epoch 8/50 | train_loss=1.0775 | val_acc=0.6395 | 13s
Epoch 9/50 | train_loss=1.0334 | val_acc=0.6655 | 13s
Epoch 10/50 | train_loss=0.9726 | val_acc=0.6700 | 13s
Epoch 11/50 | train_loss=0.9500 | val_acc=0.6920 | 13s
Epoch 12/50 | train_loss=0.8890 | val_acc=0.6965 | 13s
Epoch 13/50 | train_loss=0.8741 | val_acc=0.6975 | 13s
Epoch 14/50 | train_loss=0.8143 | val_acc=0.7040 | 13s
Epoch 15/50 | train_loss=0.8213 | val_acc=0.7155 | 13s
Epoch 16/50 | train_loss=0.7807 | val_acc=0.7185 | 13s
Epoch 17/50 | train_loss=0.7499 | val_acc=0.7335 | 12s
Epoch 18/50 | train_loss=0.7395 | val_acc=0.7215 | 13s
Epoch 19/50 | train

  best_model.load_state_dict(torch.load(best_path, map_location=device))


Best val_acc(saved)=0.7585 | Reloaded val_acc=0.7585
Weights saved: best -> simplecnn32_best.pth, last -> simplecnn32_last.pth
SimpleCNN: acc=0.7585
Per-class accuracy:
  class 0: 0.8182
  class 1: 0.8677
  class 2: 0.6318
  class 3: 0.6364
  class 4: 0.7293
  class 5: 0.6859
  class 6: 0.7700
  class 7: 0.8097
  class 8: 0.8079
  class 9: 0.8199


Собираем resnet18, адаптированный под 32×32 (уменьшаем первый слой до 3×3 stride 1, убираем maxpool, меняем fc на 10 классов), и обучаем модель напрямую на изображениях. Сохраняем лучшие веса и печатаем метрики по классам

Resnet'у нужно на те же 50 эпох уже около полутора часов, поэтому его лучше не обучать заново, просто подгрузить через pickle уже полученные веса

In [5]:
def make_resnet(num_classes=10):
    m = torchvision.models.resnet18(weights=None)
    m.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    m.maxpool = nn.Identity()
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running = 0.0
    for images, targets in loader:
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        logits = model(images)
        loss = criterion(logits, targets)
        loss.backward()
        optimizer.step()
        running += loss.item()
    return running / max(1, len(loader))

@torch.no_grad()
def evaluate(model, loader, device, num_classes=10):
    model.eval()
    correct, total = 0, 0
    per_class_correct = np.zeros(num_classes, dtype=np.int64)
    per_class_total = np.zeros(num_classes, dtype=np.int64)
    for images, targets in loader:
        images, targets = images.to(device), targets.to(device)
        logits = model(images)
        preds = logits.argmax(dim=1)
        correct += (preds == targets).sum().item()
        total += targets.size(0)
        t_np = targets.cpu().numpy()
        p_np = preds.cpu().numpy()
        for t, p in zip(t_np, p_np):
            per_class_total[t] += 1
            per_class_correct[t] += int(t == p)
    acc = correct / max(1, total)
    per_class_acc = (per_class_correct / np.maximum(1, per_class_total)).tolist()
    return acc, per_class_acc

def fit_model(model, train_loader, val_loader, epochs=5, lr=1e-3, device=torch.device('cpu'), name='model', num_classes=10):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_acc = 0.0
    best_path = f'{name}_best.pth'
    last_path = f'{name}_last.pth'

    for epoch in range(1, epochs+1):
        t0 = time.time()
        tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_acc, _ = evaluate(model, val_loader, device, num_classes=num_classes)
        dt = time.time() - t0
        print(f'Epoch {epoch}/{epochs} | train_loss={tr_loss:.4f} | val_acc={val_acc:.4f} | {int(dt)}s')

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), best_path)
        torch.save(model.state_dict(), last_path)

    model.load_state_dict(torch.load(best_path, map_location=device))
    final_acc, per_class_acc = evaluate(model, val_loader, device, num_classes=num_classes)
    print(f'Best val_acc(saved)={best_acc:.4f} | Reloaded val_acc={final_acc:.4f}')
    print(f'Weights saved: best -> {best_path}, last -> {last_path}')
    return best_path, last_path, final_acc, per_class_acc

EPOCHS = 50
model_resnet = make_resnet(num_classes=NUM_CLASSES)
path_resnet_best, path_resnet_last, acc_resnet, pc_resnet = fit_model(
    model_resnet, train_ld_32, test_ld_32,
    epochs=EPOCHS, lr=1e-3, device=DEVICE,
    name='resnet', num_classes=NUM_CLASSES
)
print(f'ResNet: val_acc={acc_resnet:.4f}')
print('Per-class accuracy:')
for i, v in enumerate(pc_resnet):
    print(f'  class {i}: {v:.4f}')

Epoch 1/50 | train_loss=1.7947 | val_acc=0.3850 | 102s
Epoch 2/50 | train_loss=1.4689 | val_acc=0.4785 | 97s
Epoch 3/50 | train_loss=1.2759 | val_acc=0.5135 | 97s
Epoch 4/50 | train_loss=1.1419 | val_acc=0.5480 | 97s
Epoch 5/50 | train_loss=1.0282 | val_acc=0.6050 | 98s
Epoch 6/50 | train_loss=0.9497 | val_acc=0.5585 | 102s
Epoch 7/50 | train_loss=0.8804 | val_acc=0.6540 | 104s
Epoch 8/50 | train_loss=0.8077 | val_acc=0.6465 | 112s
Epoch 9/50 | train_loss=0.7632 | val_acc=0.6995 | 210s
Epoch 10/50 | train_loss=0.7050 | val_acc=0.7035 | 110s
Epoch 11/50 | train_loss=0.6586 | val_acc=0.7195 | 110s
Epoch 12/50 | train_loss=0.6056 | val_acc=0.7415 | 189s
Epoch 13/50 | train_loss=0.5903 | val_acc=0.7195 | 194s
Epoch 14/50 | train_loss=0.5397 | val_acc=0.7390 | 208s
Epoch 15/50 | train_loss=0.5342 | val_acc=0.7305 | 103s
Epoch 16/50 | train_loss=0.4784 | val_acc=0.7705 | 98s
Epoch 17/50 | train_loss=0.4570 | val_acc=0.7395 | 98s
Epoch 18/50 | train_loss=0.4227 | val_acc=0.7685 | 97s
Epoch 19

  model.load_state_dict(torch.load(best_path, map_location=device))


Best val_acc(saved)=0.8305 | Reloaded val_acc=0.8305
Weights saved: best -> resnet_best.pth, last -> resnet_last.pth
ResNet: val_acc=0.8305
Per-class accuracy:
  class 0: 0.8770
  class 1: 0.8730
  class 2: 0.7562
  class 3: 0.6667
  class 4: 0.8232
  class 5: 0.7644
  class 6: 0.8263
  class 7: 0.8805
  class 8: 0.8768
  class 9: 0.9479


После обучения cnn и resnet18 извлекаем эмбеддинги (последние скрытые представления до классификатора) на train и test, и сохраняем их

In [6]:
@torch.no_grad()
def extract_simplecnn_features(ckpt_path, train_loader, test_loader, num_classes=10, device=DEVICE):
    model = SimpleCNN(num_classes=num_classes).to(device)
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.eval()

    def run_loader(loader):
        feats, labels = [], []
        for images, targets in loader:
            images = images.to(device)
            # фичи до classifier: [B,256,4,4] -> [B,4096]
            f = model.features(images)
            f = torch.flatten(f, 1)
            feats.append(f.cpu().numpy())
            labels.append(targets.numpy())
        X = np.concatenate(feats, axis=0)
        y = np.concatenate(labels, axis=0)
        return X, y

    X_train, y_train = run_loader(train_loader)
    X_test,  y_test  = run_loader(test_loader)
    return X_train, y_train, X_test, y_test


@torch.no_grad()
def extract_resnet_features(ckpt_path, train_loader, test_loader, num_classes=10, device=DEVICE):
    model = make_resnet(num_classes=num_classes).to(device)
    model.load_state_dict(torch.load(ckpt_path, map_location=device))
    model.eval()

    # обрезаем голову: всё до avgpool
    feature_extractor = nn.Sequential(*(list(model.children())[:-1])).to(device).eval()

    def run_loader(loader):
        feats, labels = [], []
        for images, targets in loader:
            images = images.to(device)
            f = feature_extractor(images)   # [B, 512, 1, 1]
            f = torch.flatten(f, 1)         # [B, 512]
            feats.append(f.cpu().numpy())
            labels.append(targets.numpy())
        X = np.concatenate(feats, axis=0)
        y = np.concatenate(labels, axis=0)
        return X, y

    X_train, y_train = run_loader(train_loader)
    X_test,  y_test  = run_loader(test_loader)
    return X_train, y_train, X_test, y_test

# извлекаем эмбеддинги
Xtr_s, ytr_s, Xte_s, yte_s = extract_simplecnn_features(
    path_simple_best, train_ld_32, test_ld_32, num_classes=NUM_CLASSES, device=DEVICE
)
with open('embeddings_simplecnn_train.pkl', 'wb') as f:
    pickle.dump({'X': Xtr_s, 'y': ytr_s}, f)
with open('embeddings_simplecnn_test.pkl', 'wb') as f:
    pickle.dump({'X': Xte_s, 'y': yte_s}, f)
print('Saved: embeddings_simplecnn_train.pkl, embeddings_simplecnn_test.pkl')
print('SimpleCNN shapes:', Xtr_s.shape, Xte_s.shape)

Xtr_r, ytr_r, Xte_r, yte_r = extract_resnet_features(
    path_resnet_best, train_ld_32, test_ld_32, num_classes=NUM_CLASSES, device=DEVICE
)
with open('embeddings_resnet_train.pkl', 'wb') as f:
    pickle.dump({'X': Xtr_r, 'y': ytr_r}, f)
with open('embeddings_resnet_test.pkl', 'wb') as f:
    pickle.dump({'X': Xte_r, 'y': yte_r}, f)
print('Saved: embeddings_resnet_train.pkl, embeddings_resnet_test.pkl')
print('ResNet shapes:', Xtr_r.shape, Xte_r.shape)

  model.load_state_dict(torch.load(ckpt_path, map_location=device))


Saved: embeddings_simplecnn_train.pkl, embeddings_simplecnn_test.pkl
SimpleCNN shapes: (10000, 4096) (2000, 4096)


  model.load_state_dict(torch.load(ckpt_path, map_location=device))


Saved: embeddings_resnet_train.pkl, embeddings_resnet_test.pkl
ResNet shapes: (10000, 512) (2000, 512)


In [7]:
@torch.no_grad()
def predict_loader(model, loader, device):
    model.eval()
    ys, ps = [], []
    for images, targets in loader:
        images = images.to(device)
        logits = model(images)
        pred = logits.argmax(dim=1).cpu().numpy()
        ys.append(targets.numpy())
        ps.append(pred)
    y_true = np.concatenate(ys)
    y_pred = np.concatenate(ps)
    return y_true, y_pred

simple_model = SimpleCNN(num_classes=NUM_CLASSES).to(DEVICE)
simple_model.load_state_dict(torch.load(path_simple_best, map_location=DEVICE))
y_true_s, y_pred_s = predict_loader(simple_model, test_ld_32, DEVICE)
acc_s = accuracy_score(y_true_s, y_pred_s)
p_s, r_s, f_s, _ = precision_recall_fscore_support(y_true_s, y_pred_s, average='macro', zero_division=0)

resnet_model = make_resnet(num_classes=NUM_CLASSES).to(DEVICE)
resnet_model.load_state_dict(torch.load(path_resnet_best, map_location=DEVICE))
y_true_r, y_pred_r = predict_loader(resnet_model, test_ld_32, DEVICE)
acc_r = accuracy_score(y_true_r, y_pred_r)
p_r, r_r, f_r, _ = precision_recall_fscore_support(y_true_r, y_pred_r, average='macro', zero_division=0)

dl_metrics = {
    'SimpleCNN': {
        'accuracy': float(acc_s),
        'precision_macro': float(p_s),
        'recall_macro': float(r_s),
        'f1_macro': float(f_s),
    },
    'ResNet': {
        'accuracy': float(acc_r),
        'precision_macro': float(p_r),
        'recall_macro': float(r_r),
        'f1_macro': float(f_r),
    }
}
with open('dl_baselines_metrics.json', 'w') as f:
    json.dump(dl_metrics, f)
print('Saved DL baselines -> dl_baselines_metrics.json')

  simple_model.load_state_dict(torch.load(path_simple_best, map_location=DEVICE))
  resnet_model.load_state_dict(torch.load(path_resnet_best, map_location=DEVICE))


Saved DL baselines -> dl_baselines_metrics.json
