# Обучение моделей в Pytorch, CVAT, обзоры бэкбонов для задач компьютерного зрения

In [None]:
# colab не может найти зависимость
# !pip install accelerate

In [None]:
import os
import random
from os.path import join as pjoin
from shutil import rmtree

import albumentations
import numpy as np
import torch
from accelerate import Accelerator
from albumentations.pytorch.transforms import ToTensorV2
from dataset import CustomVOCSegmentation
from matplotlib import pyplot as plt
from PIL import Image
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets
from train import (
    CheckpointSaver,
    IoUMetric,
    MulticlassCrossEntropyLoss,
    MulticlassDiceLoss,
    load_checkpoint,
    train,
)
from unet import UNet, count_model_params

In [None]:
def seed_everything(seed: int = 314159, torch_deterministic: bool = False) -> None:
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.use_deterministic_algorithms(torch_deterministic)


seed_everything(42, torch_deterministic=False)

## Dataset

Набор данных Pascal VOC.

Сайт: http://host.robots.ox.ac.uk/pascal/VOC/

Лидерборд за 2012 год: http://host.robots.ox.ac.uk:8080/leaderboard/displaylb_main.php?challengeid=11&compid=5

In [None]:
IMAGE_SIZE = 256
transforms = albumentations.Compose(
    [
        albumentations.PadIfNeeded(min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, p=1),
        albumentations.CropNonEmptyMaskIfExists(height=IMAGE_SIZE, width=IMAGE_SIZE),
        albumentations.HorizontalFlip(p=0.5),
        # albumentations.AdvancedBlur(p=0.5),
        # albumentations.GaussNoise(p=0.5),
        # albumentations.CLAHE(p=0.5),
        # albumentations.RandomBrightnessContrast(p=0.5),
        # albumentations.RandomGamma(p=0.5),
        # albumentations.ColorJitter(p=0.5),
        albumentations.Normalize(),
        ToTensorV2(),
    ]
)

In [None]:
# запустите, если датасет не скачан

# dataset = datasets.VOCSegmentation(
#     root="data",
#     year="2012",
#     image_set="trainval",
#     download=True,
#     transforms=transforms,
# )

In [None]:
train_dataset = CustomVOCSegmentation(
    root="data",
    year="2012",
    image_set="train",
    download=False,
    transform=transforms,
)

val_dataset = CustomVOCSegmentation(
    root="data",
    year="2012",
    image_set="val",
    download=False,
    transform=transforms,
)

## Обучение модели

См. `train.py`

In [None]:
BACKBONE_NAME = "resnet18"

LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-4
BATCH_SIZE = 16
NUM_WORKERS = 4
EPOCH_NUM = 50
CHECKPOINTS_DIR = "checkpoints"
TENSORBOARD_DIR = "tensorboard"
RM_CHECKPOINTS_DIR = False

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
train_dataloader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=True,
    drop_last=True,
)
val_dataloader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=True,
    drop_last=True,
)

accelerator = Accelerator(cpu=False, mixed_precision="no")
model = UNet(backbone_name=BACKBONE_NAME, classes_num=21)
print(count_model_params(model))

loss_fn = MulticlassCrossEntropyLoss()  # MulticlassDiceLoss()
optimizer = torch.optim.AdamW(
    model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
)
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer=optimizer, step_size=10, gamma=0.85
)
metric_fn = IoUMetric(classes_num=21, ignore_index=0, reduction="micro")

os.makedirs(CHECKPOINTS_DIR, exist_ok=True)
checkpointer = CheckpointSaver(
    accelerator=accelerator,
    model=model,
    metric_name="IoU",
    save_dir=CHECKPOINTS_DIR,
    rm_save_dir=RM_CHECKPOINTS_DIR,
    max_history=5,
    should_minimize=False,
)

In [None]:
# !pip install tensorboard
# tensorboard_logger = None

os.makedirs(TENSORBOARD_DIR, exist_ok=True)
tensorboard_logger = torch.utils.tensorboard.SummaryWriter(log_dir=TENSORBOARD_DIR)

In [None]:
# акселерируем
model, optimizer, train_dataloader, val_dataloader, lr_scheduler = accelerator.prepare(
    model, optimizer, train_dataloader, val_dataloader, lr_scheduler
)

In [None]:
train(
    model=model,
    optimizer=optimizer,
    train_dataloader=train_dataloader,
    val_dataloader=train_dataloader,  # val_dataloader,
    loss_function=loss_fn,
    metric_function=metric_fn,
    lr_scheduler=lr_scheduler,
    accelerator=accelerator,
    epoch_num=EPOCH_NUM,
    checkpointer=checkpointer,
    tb_logger=tensorboard_logger,
    save_on_val=True,
    show_every_x_batch=20,
)

## Загрузим и протестируем обученную модель

Предобученный чекпоинт: https://disk.yandex.ru/d/C6dRX7Un1L7qsw

Поместить в ".\checkpoints"

In [None]:
model = UNet(backbone_name=BACKBONE_NAME, classes_num=21)
model = load_checkpoint(
    model=model, load_path=pjoin(CHECKPOINTS_DIR, "model_checkpoint_best.pt")
)
model = model.to(DEVICE)
model.eval()

In [None]:
sample_idx = 42
image, target = train_dataset[sample_idx]
target = torch.argmax(target, axis=0)
preds = torch.argmax(
    model(image.unsqueeze(0).to(DEVICE)).squeeze(0), axis=0
)

fig, ax = plt.subplots(1, 3, figsize=(9, 18))
ax[0].imshow(image.numpy().transpose(1, 2, 0).astype(np.uint8))
ax[1].imshow(target.numpy())
ax[2].imshow(preds.cpu().numpy());

## Разметка данных с помощью CVAT

Сайт: https://www.cvat.ai/

## Обзоры бекбонов

- Обзор до ~2020: https://arxiv.org/pdf/2206.08016.pdf
- Свежий обзор последних новостей: https://arxiv.org/pdf/2310.19909.pdf