## Детекция клеток

Ваша задача: обучить YOLO для детекции дрожжевых клеток и микроструктур (см. [07_object_detection.ipynb](../workshops/07_object_detection.ipynb)). Всё необходимое для запуска обучения вы можете взять из ноутбука с практикой, доделать нужно будет самую малость:
- реализовать расчёт Mean Average Precision для всего валидационного сета
- попробовать привести regression loss к виду, который используется в Yolo9000 и YoloV3
- подобрать лучшие размеры якорных рамок с помощью кластеризации

Основная цель: любыми средствами добиться $mAP > 0.6$ на валидации.

Используйте класс `torchmetrics.detection.MeanAveragePrecision` для расчёта $mAP$.

Нас будет интересовать именно значение `map` в словаре со всеми метриками - это mean average precision, усреднённый по всем отсечкам intersection over union в диапазоне $[0.5, 0.95]$ (см. документацию к классу).

При решении можно пользоваться `lightning` или писать цикл обучения вручную. В последнем случае не забудьте вручную отправить модель и батчи на GPU, чтобы обучалось быстрее

### Задание 1 (3 балла). Цикл обучения с расчётом Mean Average Precision

Запустите обучение модели из практики на всём обучающем датасете, выведите значение $mAP$ на валидационном датасете после окончания обучения.

В этом задании добейтесь $mAP > 0.3$, если всё сделано правильно - для этого должно хватать 30-50 эпох.

In [1]:
from pathlib import Path

import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import torch
from matplotlib.figure import Figure
from torch import Tensor, nn
from torch.utils.data import DataLoader, Dataset
from torchmetrics.functional.detection import intersection_over_union
from torchvision.ops.boxes import box_convert

In [2]:

torch.manual_seed(42)
device = torch.device("cuda")

In [3]:
def process_yolo_preds(preds: Tensor, rescaled_anchors: Tensor) -> tuple[Tensor, Tensor, Tensor]:
    """
    Преобразование выходов модели в
    1. Логит наличия объекта (вероятность получается применением сигмоиды)
    2. Положение рамки относительно ячейки в формате cxcywh
    3. Логиты классов (вероятности получаются применением softmax)
    """
    rescaled_anchors = rescaled_anchors.view(1, len(rescaled_anchors), 1, 1, 2)
    box_predictions = preds[..., 1:5].clone()

    box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
    box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * rescaled_anchors

    scores = preds[..., 0:1]
    return scores, box_predictions, preds[..., 5:]

In [4]:
GRID_SIZE = 8
IMAGE_SIZE = 256
ANCHORS = [
    [48, 72],
    # [64, 64],
    # [72, 48],
]

rescaled_anchors = torch.tensor(ANCHORS).to(device=device) / IMAGE_SIZE * GRID_SIZE

In [5]:
class CNNBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, **kwargs):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.activation = nn.LeakyReLU(0.1)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return self.activation(x)

class TinyYOLO(nn.Module):
    def __init__(self, num_classes: int = 2, num_anchors: int = 1, in_channels: int = 1) -> None:
        super().__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.num_anchors = num_anchors
        self.layers = nn.Sequential(
            CNNBlock(1, 16, kernel_size=3, stride=2, padding=1, dilation=2),
            CNNBlock(16, 32, kernel_size=3, stride=2, padding=1, dilation=2),
            CNNBlock(32, 64, kernel_size=3, stride=2, padding=1, dilation=2),
            CNNBlock(64, 128, kernel_size=3, stride=2, padding=1, groups=8),
            CNNBlock(128, 256, kernel_size=3, stride=1, padding=1, groups=8),
            CNNBlock(256, 256, kernel_size=3, stride=1, padding=1, groups=16),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(256, num_anchors * (num_classes + 5), kernel_size=1)
        )
    
    def forward(self, x: Tensor) -> Tensor:
        x = self.layers(x)
        B, _, W, H = x.shape
        x = x.view(B, self.num_anchors, self.num_classes + 5, W, H)  # B A F W H
        x = x.permute(0, 1, 3, 4, 2)  # B A W H F
        return x


model = TinyYOLO(in_channels=1, num_anchors=1, num_classes=2)
print(sum(p.numel() for p in model.parameters()))
model.forward(torch.randn(2, 1, 256, 256)).shape

109431


torch.Size([2, 1, 8, 8, 7])

In [6]:
def iou_wh(wh1: Tensor, wh2: Tensor) -> Tensor:
    # IoU based on width and height of bboxes

    # intersection
    intersection_area = torch.min(wh1[..., 0], wh2[..., 0]) * torch.min(wh1[..., 1], wh2[..., 1])

    # union
    box1_area = wh1[..., 0] * wh1[..., 1]
    box2_area = wh2[..., 0] * wh2[..., 1]
    union_area = box1_area + box2_area - intersection_area

    iou_score = intersection_area / union_area
    return iou_score


def boxes_to_cells(
    boxes: Tensor,
    classes: Tensor,
    rescaled_anchors: Tensor,
    grid_size: int = 8,
    ignore_iou_thresh: float = 0.5,
) -> Tensor:
    """
    Переводит bbox представление в клеточное представление, где каждая рамка -
    (id класса, вероятность нахождения объекта, cx, cy, w, h), а клеточное представление
    имеет размер (batch_size, n_anchors, grid_size, grid_size, 6), в последней размерности
    хранятся признаки ячейки: класс объекта, вероятность объекта, координаты и размеры рамки
    относительно ячейки

    Args:
        boxes (Tensor): тензор со всеми рамками
        classes (Tensor): тензор с id классов объектов
        rescaled_anchors (Tensor): тензор размера (n_anchors, 2) с размерами якорей в долях от размеров ячейки
        grid_size (int): размер сетки,
        ignore_iou_thresh (float, optional): значение IoU для рамок, при котором ячейка,
            занятая более чем одним объектом, будет специально помечена для игнорирования
    """
    targets = torch.zeros((len(rescaled_anchors), grid_size, grid_size, 6))

    # Каждой рамке сопоставляем клетку и наиболее подходящий якорь
    for box, class_label in zip(boxes, classes):
        iou_anchors = iou_wh(box[2:4], rescaled_anchors / grid_size)
        anchor_indices = iou_anchors.argsort(descending=True, dim=0)
        x, y, width, height = box

        # Относим рамку к наиболее подходящему якорю
        has_anchor = False
        for anchor_idx in anchor_indices:
            s = grid_size

            # Определяем клетку, к которой относится рамка
            i, j = int(s * y), int(s * x)
            anchor_taken = targets[anchor_idx, i, j, 0]

            # Проверяем, доступен ли якорная рамка для текущей ячейки
            if not anchor_taken and not has_anchor:
                # Пересчитываем координаты по отношению к клетке
                x_cell, y_cell = s * x - j, s * y - i
                width_cell, height_cell = (width * s, height * s)
                box_coordinates = torch.tensor([x_cell, y_cell, width_cell, height_cell])

                # Заполняем содержимое для выбранной клетки
                targets[anchor_idx, i, j, 0] = 1  # указатель, что в клетке есть объект
                targets[anchor_idx, i, j, 1:5] = box_coordinates
                targets[anchor_idx, i, j, 5] = int(class_label)

                has_anchor = True

            # Если якорь уже выбран, проверим IoU, если больше threshold - пометим клетку -1
            elif not anchor_taken and iou_anchors[anchor_idx] > ignore_iou_thresh:
                targets[anchor_idx, i, j, 0] = -1

    return targets

In [7]:
class YeastDetectionDataset(Dataset):
    def __init__(
        self, subset_dir: Path, anchors: list[tuple[int, int]], image_size: int, grid_size: int = 8
    ) -> None:
        super().__init__()
        self.subset_dir = subset_dir
        self.items = list((self.subset_dir / "inputs").glob("*.pt"))
        # Ignore IoU threshold
        self.ignore_iou_thresh = 0.5
        self.rescaled_anchors = torch.tensor(anchors) / image_size * grid_size
        self.grid_size = grid_size
        self.image_size = image_size

    def __len__(self) -> int:
        return len(self.items)

    def __getitem__(self, index: int) -> tuple[Tensor, Tensor]:
        image_path = self.items[index]
        # load everything
        image = torch.load(image_path, weights_only=True).unsqueeze(0)
        classes = (
            torch.load(self.subset_dir / "classes" / image_path.parts[-1], weights_only=True) + 1
        )
        boxes = torch.load(
            self.subset_dir / "bounding_boxes" / image_path.parts[-1], weights_only=True
        )
        boxes = box_convert(boxes, "xyxy", "cxcywh") / self.image_size

        # convert boxes to cells
        targets = boxes_to_cells(
            boxes, classes, self.rescaled_anchors, self.grid_size, self.ignore_iou_thresh
        )
        return image, targets

In [8]:
train_dataset = YeastDetectionDataset(
    Path("yeast_cell_in_microstructures_dataset/train"), anchors=ANCHORS, image_size=256
)
val_dataset = YeastDetectionDataset(Path("yeast_cell_in_microstructures_dataset/val"), anchors=ANCHORS, image_size=256)

batch_size = 32
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

In [9]:
def cells_to_bboxes(cells: Tensor, rescaled_anchors: Tensor, is_predictions=False) -> Tensor:
    """
    Переводит клеточное представление в bbox представление, где каждая рамка -
    (id класса, вероятность нахождения объекта, cx, cy, w, h), а клеточное представление
    имеет размер (batch_size, n_anchors, grid_size, grid_size, 6), в последней размерности
    хранятся признаки ячейки: класс объекта, вероятность объекта, координаты и размеры рамки
    относительно ячейки

    Args:
        cells (Tensor): тензор размера (batch_size, n_anchors, width, height, 6)
        rescaled_anchors (Tensor): тензор размера (n_anchors, 2) с размерами якорей в долях от размеров ячейки
        is_predictions (bool, optional): являются ли входные ячейки предсказаниями или верной аннотацией.
    """

    if is_predictions:
        scores, box_predictions, logits = process_yolo_preds(cells, rescaled_anchors)
        scores = torch.sigmoid(scores)
        best_class = torch.argmax(logits, dim=-1).unsqueeze(-1) + 1

    else:
        box_predictions = cells[..., 1:5].clone()
        scores = cells[..., 0:1]
        best_class = cells[..., 5:6]

    # масштабируем размер рамок [0, grid_size] -> [0, 1]
    _, _, H, W, _ = cells.shape
    range_y, range_x = torch.meshgrid(
        torch.arange(H, dtype=cells.dtype, device=cells.device),
        torch.arange(W, dtype=cells.dtype, device=cells.device),
        indexing="ij",
    )
    x = torch.cat(
        [
            best_class,
            scores,
            (box_predictions[..., 0:1] + range_x[None, None, :, :, None]) / W,  # X center
            (box_predictions[..., 1:2] + range_y[None, None, :, :, None]) / H,  # Y center
            box_predictions[..., 2:3] / W,  # Width
            box_predictions[..., 3:4] / H,  # Height
        ],
        -1,
    )

    return x.view(-1, 6)

In [10]:
def plot_image(image: Tensor, boxes: Tensor, class_labels: list[str]) -> Figure:
    # назначим цвета для классов
    colour_map = plt.get_cmap("tab20b")
    colors = [colour_map(i) for i in np.linspace(0, 1, len(class_labels))]

    fig, ax = plt.subplots(1)
    ax.imshow(image, cmap="gray")
    h, w = image.shape
    for box in boxes:
        # добавляем прямоугольник
        class_pred = box[0] - 1
        box = box[2:]
        upper_left_x = box[0] - box[2] / 2
        upper_left_y = box[1] - box[3] / 2

        rect = patches.Rectangle(
            (upper_left_x * w, upper_left_y * h),
            box[2] * w,
            box[3] * h,
            linewidth=2,
            edgecolor=colors[int(class_pred)],
            facecolor="none",
        )
        ax.add_patch(rect)

        # добавляем подпись
        ax.text(
            upper_left_x * w,
            upper_left_y * h,
            s=class_labels[int(class_pred)],
            color="white",
            verticalalignment="top",
            bbox={"color": colors[int(class_pred)], "pad": 0},
        )

    return fig

In [11]:
class YOLOLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        self.bce = nn.BCEWithLogitsLoss()
        self.cross_entropy = nn.CrossEntropyLoss()

    def forward(self, pred: Tensor, target: Tensor, anchors: Tensor) -> Tensor:
        # ниже входные тензоры будут меняться на месте, так что склонируем их
        pred = pred.clone()
        target = target.clone()

        # разделяем рамки на содержащие объекты и не содержащие
        # NB: ещё могут быть -1, куда отнеслось более 1 объекта - их не учитываем
        obj = target[..., 0] == 1
        no_obj = target[..., 0] == 0

        # преобразуем предсказания bbox
        scores, pred_boxes, logits = process_yolo_preds(pred, anchors)

        # no object loss: кросс-энтропия вместо MSE
        no_object_loss = self.bce(
            (scores[no_obj]),
            (target[..., 0:1][no_obj]),
        )

        # object loss: учим предсказывать IoU
        ious = intersection_over_union(pred_boxes[obj], target[..., 1:5][obj]).detach()
        object_loss = self.mse(scores[obj].sigmoid(), ious * target[..., 0:1][obj])

        # box coordinate loss: логарифмируем размеры рамок перед расчётом MSE
        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
        pred[..., 1:3] = pred[..., 1:3].sigmoid()
        target[..., 3:5] = torch.log(1e-6 + target[..., 3:5] / anchors)
        box_loss = self.mse(pred[..., 1:5][obj], target[..., 1:5][obj])

        # class loss: здесь всё обычно
        class_loss = self.cross_entropy(logits[obj], target[..., 5][obj].long() - 1)

        # Total loss
        return box_loss + object_loss + no_object_loss + class_loss

rescaled_anchors = rescaled_anchors.to(device)  

torch.manual_seed(42)
x, y = next(iter(train_loader))
device = torch.device("cuda")
x, y = x.to(device), y.to(device)

model = TinyYOLO(in_channels=1, num_classes=2, num_anchors=len(rescaled_anchors)).to(device)
print(sum(p.numel() for p in model.parameters()))

preds = model(x)

loss_fn = YOLOLoss().to(device)

# Убеждаемся, что anchors тоже на CUDA
rescaled_anchors = rescaled_anchors.to(device)

# Теперь вызываем функцию потерь
loss = loss_fn(preds, y, rescaled_anchors)
print(loss.item())


109431
1.695744276046753


In [12]:
from typing import Callable, Type, Any
import lightning as L

from lightning.pytorch.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
from PIL.Image import Image
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

_collate_fn_t = Callable[[list[tuple[Tensor, Any]]], Any]

class Datamodule(L.LightningDataModule):
    def __init__(
        self,
        datadir: Path,
        dataset_class: Type[Dataset],
        batch_size: int,
        train_subdir: str = "train",
        val_subdir: str = "val",
        test_subdir: str = "test",
    ) -> None:
        super().__init__()
        self.batch_size = batch_size
        self.dataset_class = dataset_class
        self.train_dir = datadir / train_subdir
        self.val_dir = datadir / val_subdir
        self.test_dir = datadir / test_subdir

    @property
    def collate_fn(self) -> _collate_fn_t | None:
        if self.dataset_class == YeastDetectionDataset:
            return None
        else:
            return lambda batch: tuple(zip(*batch))

    def setup(self, stage: str) -> None:
        if stage in ("fit", "validate"):
            self.val_dataset = self.dataset_class(self.val_dir, anchors=ANCHORS, image_size=256)
        if stage == "fit":
            self.train_dataset = self.dataset_class(self.train_dir, anchors=ANCHORS, image_size=256)
        elif stage == "test":
            self.test_dataset = self.dataset_class(self.test_dir, anchors=ANCHORS, image_size=256)

    def train_dataloader(self) -> TRAIN_DATALOADERS:
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            collate_fn=self.collate_fn,
        )

    def val_dataloader(self) -> EVAL_DATALOADERS:
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            collate_fn=self.collate_fn,
        )

    def test_dataloader(self) -> EVAL_DATALOADERS:
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            collate_fn=self.collate_fn,
        )

In [13]:
from typing import Any
from torchvision.ops.boxes import nms
from lightning.pytorch.utilities.types import STEP_OUTPUT
import torchmetrics.classification
from torchmetrics.detection import MeanAveragePrecision


class Lit(L.LightningModule):
    def __init__(self, model: nn.Module, learning_rate: float) -> None:
        super().__init__()
        self.save_hyperparameters()
        self.model = model
        self.learning_rate = learning_rate
        self.loss_fn = YOLOLoss()

    def training_step(
        self, batch: tuple[Tensor, Tensor], batch_idx: int
    ) -> STEP_OUTPUT:
        x, y = batch
        y_hat = self.model(x)
        loss = self.loss_fn.forward(y_hat, y, rescaled_anchors)
        self.log("train_loss", loss, on_epoch=True, on_step=False)
        # обновляем метрики и логируем раз в эпоху

        return loss

    def validation_step(
        self, batch: tuple[Tensor, Tensor], batch_idx: int
    ) -> STEP_OUTPUT | None:
        x, y = batch
        y_hat = self.model(x)
        loss = self.loss_fn.forward(y_hat, y, rescaled_anchors)
        self.log("val_loss", loss, on_epoch=True, on_step=False)
        # обновляем метрики и логируем раз в эпоху
        return {
            "loss": loss,
            "preds": y_hat,
        }
    
    def on_validation_epoch_end(self) -> None:
        val_loss = self.trainer.callback_metrics["val_loss"]
        print(f" Val. Loss: {val_loss:.4f}")
    

    def configure_optimizers(self) -> dict[str, Any]:
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.learning_rate)
        # давайте кроме оптимизатора создадим ещё расписание для шага оптимизации
        return {
            "optimizer": optimizer,
            "lr_scheduler": torch.optim.lr_scheduler.MultiStepLR(
                optimizer, milestones=[5, 10, 15]
            ),
        }

In [14]:
from lightning.pytorch.loggers import TensorBoardLogger

datamodule = Datamodule(
    datadir=Path("yeast_cell_in_microstructures_dataset"),
    dataset_class=YeastDetectionDataset,
    batch_size=8,
)

datamodule.setup("fit")

trainer = L.Trainer(
    accelerator="auto",
    max_epochs=40,
    limit_train_batches=None,
    limit_val_batches=None,
    log_every_n_steps=10
)
lit_module = Lit(
    model=TinyYOLO(in_channels=1, num_anchors=len(rescaled_anchors), num_classes=2),
    learning_rate=1e-3,
)
trainer.fit(
    model=lit_module,
    datamodule=datamodule,
)

trainer.validate(model=lit_module, dataloaders=datamodule.val_dataloader())

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/ne_ravilka/miniconda3/envs/dl-mcs/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type     | Params | Mode 
---------------------------------------------
0 | model   | TinyYOLO | 109 K  | train
1 | loss_fn | YOLOLoss | 0      | train
----------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ne_ravilka/miniconda3/envs/dl-mcs/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


 Val. Loss: 1.4646


/home/ne_ravilka/miniconda3/envs/dl-mcs/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.2550


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1614


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1368


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1328


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1107


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1174


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1173


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1167


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1207


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1197


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1217


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1221


Validation: |          | 0/? [-1:59:58<?, ?it/s]

 Val. Loss: 0.1223


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1224


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1225


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1256


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1257


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1225


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1236


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1249


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1221


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1237


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1267


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1232


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1229


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1232


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1238


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1242


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1250


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1267


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1209


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1226


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1250


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1236


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1254


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1234


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1217


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1230


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1224


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=40` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


 Val. Loss: 0.1237


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1237
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        val_loss            0.12369412928819656
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.12369412928819656}]

In [15]:
from pprint import pprint
from torchmetrics.detection import MeanAveragePrecision 

val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

metric = MeanAveragePrecision(iou_type="bbox", box_format="cxcywh", class_metrics=True).to(device)

for x, y in val_loader:
    predicted_boxes = []
    target_boxes = []

    preds = lit_module.model.to(device)(x.to(device))

    for i in range(len(y)):
        pred_boxes = cells_to_bboxes(preds[i:i+1], rescaled_anchors.to(device), is_predictions=True)
        pred_boxes = pred_boxes.to(device)  

        
        pred_boxes = pred_boxes[nms(
            box_convert(pred_boxes[:, 2:], "cxcywh", "xyxy"),
            pred_boxes[:, 1],
            iou_threshold=0.3
        )]

        predicted_boxes.append(
            dict(
                boxes=pred_boxes[:, 2:].to(device),  
                scores=pred_boxes[:, 1].to(device),  
                labels=pred_boxes[:, 0].long().to(device),  
            )
        )

        true_boxes = cells_to_bboxes(y[i:i+1].to(device), rescaled_anchors.to(device))
        true_boxes = true_boxes[true_boxes[:, 1] == 1]

        target_boxes.append(
            dict(
                boxes=true_boxes[:, 2:].to(device), 
                labels=true_boxes[:, 0].long().to(device),  
            )
        )

    metric.update(predicted_boxes, target_boxes)

print(metric.compute())


{'map': tensor(0.3783), 'map_50': tensor(0.8432), 'map_75': tensor(0.2950), 'map_small': tensor(0.3783), 'map_medium': tensor(-1.), 'map_large': tensor(-1.), 'mar_1': tensor(0.2395), 'mar_10': tensor(0.4675), 'mar_100': tensor(0.4675), 'mar_small': tensor(0.4675), 'mar_medium': tensor(-1.), 'mar_large': tensor(-1.), 'map_per_class': tensor([0.4923, 0.2643]), 'mar_100_per_class': tensor([0.5786, 0.3565]), 'classes': tensor([1, 2], dtype=torch.int32)}


### Задание 2 (2 балла). YoloV3 loss

Мы упоминали, что на практике использовалась не совсем та же самая ошибка, что и в YOLO. В этом задании исправьте в классе YoloLoss ошибку регрессии, приведя её в соответствие с тем, как она описана в статье [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767) (см. раздел 2.1. Bounding Box Prediction).

Запустите обучение с изменённой ошибкой, добейтесь $mAP > 0.3$

In [16]:
class YOLOLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        self.bce = nn.BCEWithLogitsLoss()
        self.ce = nn.CrossEntropyLoss()

    def forward(self, pred: Tensor, target: Tensor, anchors: Tensor) -> Tensor:
        # ниже входные тензоры будут меняться на месте, так что склонируем их
        pred = pred.clone()
        target = target.clone()

        # разделяем рамки на содержащие объекты и не содержащие
        # NB: ещё могут быть -1, куда отнеслось более 1 объекта - их не учитываем
        obj = target[..., 0] == 1
        no_obj = target[..., 0] == 0

        # преобразуем предсказания bbox
        scores, pred_boxes, logits = process_yolo_preds(pred, anchors)

        # no object loss: кросс-энтропия вместо MSE
        no_object_loss = self.bce(
            (scores[no_obj]),
            (target[..., 0:1][no_obj]),
        )

        # object loss: учим предсказывать IoU
        ious = intersection_over_union(pred_boxes[obj], target[..., 1:5][obj]).detach()
        object_loss = self.mse(scores[obj].sigmoid(), ious * target[..., 0:1][obj])

        # box coordinate loss: логарифмируем размеры рамок перед расчётом MSE
        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
        pred[..., 1:3] = pred[..., 1:3].sigmoid()
        pred[..., 3:5] = torch.sqrt(anchors * torch.exp(pred[..., 3:5]))
        target[..., 3:5] = torch.sqrt(target[..., 3:5])
        box_loss = self.mse(pred[..., 1:5][obj], target[..., 1:5][obj])

        # class loss: здесь всё обычно
        class_loss = self.ce(logits[obj], target[..., 5][obj].long() - 1)

        # Total loss
        return box_loss + object_loss + no_object_loss + class_loss

In [17]:
datamodule = Datamodule(
    datadir=Path("yeast_cell_in_microstructures_dataset"),
    dataset_class=YeastDetectionDataset,
    batch_size=8,
)

datamodule.setup("fit")

trainer = L.Trainer(
    accelerator="auto",
    max_epochs=40,
    limit_train_batches=None,
    limit_val_batches=None,
    log_every_n_steps=10
)
lit_module = Lit(
    model=TinyYOLO(in_channels=1, num_anchors=len(rescaled_anchors), num_classes=2),
    learning_rate=0.005,
)
trainer.fit(
    model=lit_module,
    datamodule=datamodule,
)

trainer.validate(model=lit_module, dataloaders=datamodule.val_dataloader())

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type     | Params | Mode 
---------------------------------------------
0 | model   | TinyYOLO | 109 K  | train
1 | loss_fn | YOLOLoss | 0      | train
---------------------------------------------
109 K     Trainable params
0         Non-trainable params
109 K     Total params
0.438     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 1.4873


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.2633


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.4092


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.4046


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1065


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0886


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0875


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0846


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0818


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0814


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0790


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0782


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0779


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0774


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0775


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0774


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0774


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0777


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0770


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0770


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0773


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0771


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0774


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0771


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0771


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0771


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0770


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0769


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0776


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0776


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0772


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0769


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0770


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0771


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0766


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0767


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0774


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0770


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0771


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0766


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=40` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


 Val. Loss: 0.0765


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0765
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        val_loss            0.07654141634702682
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.07654141634702682}]

In [18]:
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

metric = MeanAveragePrecision(iou_type="bbox", box_format="cxcywh", class_metrics=True).to(device)

for x, y in val_loader:
    predicted_boxes = []
    target_boxes = []

    preds = lit_module.model.to(device)(x.to(device))

    # итерируемся по элементам батча, собирая пердсказанные и верные рамки
    for i in range(len(y)):
        # получаем предсказанные рамки
        pred_boxes = cells_to_bboxes(preds[i:i+1], rescaled_anchors.to(device), is_predictions=True)
        # ВАЖНО: делаем non-max suppression ДО расчёта метрик
        pred_boxes = pred_boxes[nms(box_convert(pred_boxes[:, 2:], "cxcywh", "xyxy"), pred_boxes[:, 1], iou_threshold=0.3)]
        predicted_boxes.append(
            dict(
                boxes=pred_boxes[:, 2:],
                scores=pred_boxes[:, 1],
                labels=pred_boxes[:, 0].long(),
            )
        )
        # достаём правильные рамки
        true_boxes = cells_to_bboxes(y[i:i+1], rescaled_anchors)
        true_boxes = true_boxes[true_boxes[:, 1] == 1]

        target_boxes.append(
            dict(
                boxes=true_boxes[:, 2:],
                labels=true_boxes[:, 0].long(),
            )
        )
    metric.update(predicted_boxes, target_boxes)
    
metric = metric.to(device)
print(metric.compute())

{'map': tensor(0.5705), 'map_50': tensor(0.9297), 'map_75': tensor(0.6186), 'map_small': tensor(0.5705), 'map_medium': tensor(-1.), 'map_large': tensor(-1.), 'mar_1': tensor(0.3159), 'mar_10': tensor(0.6283), 'mar_100': tensor(0.6297), 'mar_small': tensor(0.6297), 'mar_medium': tensor(-1.), 'mar_large': tensor(-1.), 'map_per_class': tensor([0.7433, 0.3978]), 'mar_100_per_class': tensor([0.7918, 0.4676]), 'classes': tensor([1, 2], dtype=torch.int32)}


### Задание 3 (3 балла). Выбор anchors с помощью кластеризации

В статье [YOLO9000: Better, Faster, Stronger](https://arxiv.org/abs/1612.08242) в разделе 2. Better. Dimension clusters описан способ выбора anchor boxes через кластеризацию обучающего датасета.

Проделайте то же самое с вашим обучающим датасетом, чтобы выбрать несколько anchor boxes.

В качестве результата выведите получившиеся размеры anchors для # Clusters = 5

In [19]:
from pyclustering.utils.metric import type_metric, distance_metric
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
from pyclustering.cluster.kmeans import kmeans
import warnings
np.warnings = warnings

class YeastDetectionDataset(Dataset):
    def __init__(
        self, subset_dir: Path, image_size: int, grid_size: int = GRID_SIZE
    ) -> None:
        super().__init__()
        self.subset_dir = subset_dir
        self.items = list((self.subset_dir / "inputs").glob("*.pt"))
        # Ignore IoU threshold
        self.ignore_iou_thresh = 0.5
        self.grid_size = grid_size
        self.image_size = image_size

    def __len__(self) -> int:
        return len(self.items)

    def __getitem__(self, index: int) -> Tensor:
        image_path = self.items[index]
        boxes = torch.load(
            self.subset_dir / "bounding_boxes" / image_path.parts[-1], weights_only=True
        )
        boxes = box_convert(boxes, "xyxy", "cxcywh")
        return boxes

train_dataset = YeastDetectionDataset(
    Path("yeast_cell_in_microstructures_dataset/train"), image_size=256
)

our_p_w_p_h = list()
for i in range(len(train_dataset)):
  for j in range(len(train_dataset[i])):
    our_p_w_p_h.append([train_dataset[i][j][2], train_dataset[i][j][3]])


def metric(x, y):
  return 1 - (min(x[0], y[0]) * min(x[1], y[1]) / (x[0] * x[1] + y[0] * y[1] - (min(x[0], y[0]) * min(x[1], y[1]))))

manhattan_metric = distance_metric(type_metric.USER_DEFINED, func=metric)
initial_centers = kmeans_plusplus_initializer(our_p_w_p_h, 5).initialize()

kmeans_instance = kmeans(our_p_w_p_h, initial_centers, metric=manhattan_metric)

kmeans_instance.process()
clusters = kmeans_instance.get_clusters()
kmeans_instance.process()
clusters = kmeans_instance.get_clusters()

final_centers = kmeans_instance.get_centers()

ANCHORS = final_centers

rescaled_anchors = torch.tensor(ANCHORS).to(device=device) / IMAGE_SIZE * GRID_SIZE
ANCHORS

[[61.81124496459961, 62.1164665222168],
 [43.63218307495117, 48.04597854614258],
 [70.28755187988281, 105.5622329711914],
 [57.12345504760742, 84.88888549804688],
 [91.29850769042969, 88.31343078613281]]

### Задание 4 (4 балла + бонусы за лучшую точность). Обучите модель


Ваша цель: $mAP > 0.6$ на валидации.

Можете использовать весь арсенал:
- использование множества якорных рамок (сформированных вручную или в результате кластеризации)
- любые изменения функции ошибки
- любые изменения архитектуры модели и регуляризация
- аугментации (вспоминаем `torchvision.transforms` и `albumentations`)
- любая длительность обучения, оптимизатор, расписание для learning rate

Бонусы за повышенную точность:
- **5 баллов**: $mAP > 0.65$
- **1 балл** за каждые следующие $0.01$ (т. е. за $mAP > 0.72$ в этом задании вы получите $4 + 12 = 16$ баллов)

**Важно**: перез запуском обучения зафиксируйте `torch.manual_seed()`

In [20]:
class YeastDetectionDataset(Dataset):
    def __init__(
        self, subset_dir: Path, anchors: list[tuple[int, int]], image_size: int, grid_size: int = GRID_SIZE
    ) -> None:
        super().__init__()
        self.subset_dir = subset_dir
        self.items = list((self.subset_dir / "inputs").glob("*.pt"))
        # Ignore IoU threshold
        self.ignore_iou_thresh = 0.5
        self.rescaled_anchors = torch.tensor(anchors) / image_size * grid_size
        self.grid_size = grid_size
        self.image_size = image_size

    def __len__(self) -> int:
        return len(self.items)

    def __getitem__(self, index: int) -> tuple[Tensor, Tensor]:
        image_path = self.items[index]
        # load everything
        image = torch.load(image_path, weights_only=True).unsqueeze(0)
        classes = (
            torch.load(self.subset_dir / "classes" / image_path.parts[-1], weights_only=True) + 1
        )
        boxes = torch.load(
            self.subset_dir / "bounding_boxes" / image_path.parts[-1], weights_only=True
        )
        boxes = box_convert(boxes, "xyxy", "cxcywh") / self.image_size

        # convert boxes to cells
        targets = boxes_to_cells(
            boxes, classes, self.rescaled_anchors, self.grid_size, self.ignore_iou_thresh
        )
        return image.to(device), targets.to(device)

In [22]:
class YOLOLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        self.bce = nn.BCEWithLogitsLoss()
        self.cross_entropy = nn.CrossEntropyLoss()

    def forward(self, pred: Tensor, target: Tensor, anchors: Tensor) -> Tensor:
        # ниже входные тензоры будут меняться на месте, так что склонируем их
        pred = pred.clone()
        target = target.clone()

        # разделяем рамки на содержащие объекты и не содержащие
        # NB: ещё могут быть -1, куда отнеслось более 1 объекта - их не учитываем
        obj = target[..., 0] == 1
        no_obj = target[..., 0] == 0

        # преобразуем предсказания bbox
        scores, pred_boxes, logits = process_yolo_preds(pred, anchors)

        # no object loss: кросс-энтропия вместо MSE
        no_object_loss = self.bce(
            (scores[no_obj]),
            (target[..., 0:1][no_obj]),
        )

        # object loss: учим предсказывать IoU
        ious = intersection_over_union(pred_boxes[obj], target[..., 1:5][obj]).detach()
        object_loss = self.mse(scores[obj].sigmoid(), ious * target[..., 0:1][obj])

        # box coordinate loss: логарифмируем размеры рамок перед расчётом MSE
        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
        pred[..., 1:3] = pred[..., 1:3].sigmoid()
        target[..., 3:5] = torch.log(1e-6 + target[..., 3:5] / anchors)
        box_loss = self.mse(pred[..., 1:5][obj], target[..., 1:5][obj])

        # class loss: здесь всё обычно
        class_loss = self.cross_entropy(logits[obj], target[..., 5][obj].long() - 1)

        # Total loss
        return box_loss + object_loss + no_object_loss + class_loss

In [23]:
class Lit(L.LightningModule):
    def __init__(self, model: nn.Module, learning_rate: float) -> None:
        super().__init__()
        self.save_hyperparameters()
        self.model = model
        self.learning_rate = learning_rate
        self.loss_fn = YOLOLoss()

    def training_step(
        self, batch: tuple[Tensor, Tensor], batch_idx: int
    ) -> STEP_OUTPUT:
        x, y = batch
        y_hat = self.model(x)
        loss = self.loss_fn.forward(y_hat, y, rescaled_anchors)
        self.log("train_loss", loss, on_epoch=True, on_step=False)
       

        return loss

    def validation_step(
        self, batch: tuple[Tensor, Tensor], batch_idx: int
    ) -> STEP_OUTPUT | None:
        x, y = batch
        y_hat = self.model(x)
        loss = self.loss_fn.forward(y_hat, y, rescaled_anchors)
        self.log("val_loss", loss, on_epoch=True, on_step=False)
        return {
            "loss": loss,
            "preds": y_hat,
        }
    
    def on_validation_epoch_end(self) -> None:
        val_loss = self.trainer.callback_metrics["val_loss"]
        print(f" Val. Loss: {val_loss:.4f}")
    

    def configure_optimizers(self) -> dict[str, Any]:
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=1e-5)
        return {
            "optimizer": optimizer,
            "lr_scheduler": torch.optim.lr_scheduler.MultiStepLR(
                optimizer, milestones=[5, 10, 15]
            ),
        }

In [24]:
class CNNBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, **kwargs):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.activation = nn.LogSigmoid()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return self.activation(x)

class TinyYOLO(nn.Module):
    def __init__(self, num_classes: int = 2, num_anchors: int = 1, in_channels: int = 1) -> None:
        super().__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.num_anchors = num_anchors
        self.layers = nn.Sequential(
            CNNBlock(1, 16, kernel_size=3, stride=2, padding=1, dilation=2),
            CNNBlock(16, 32, kernel_size=3, stride=2, padding=1, dilation=2),
            CNNBlock(32, 64, kernel_size=3, stride=2, padding=1, dilation=2),
            CNNBlock(64, 128, kernel_size=3, stride=2, padding=1, groups=8),
            CNNBlock(128, 256, kernel_size=3, stride=1, padding=1, groups=8),
            CNNBlock(256, 256, kernel_size=3, stride=1, padding=1, groups=16),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(256, num_anchors * (num_classes + 5), kernel_size=1)
        )
    
    def forward(self, x: Tensor) -> Tensor:
        x = self.layers(x)
        B, _, W, H = x.shape
        x = x.view(B, self.num_anchors, self.num_classes + 5, W, H)  # B A F W H
        x = x.permute(0, 1, 3, 4, 2)  # B A W H F
        return x


model = TinyYOLO(in_channels=1, num_anchors=1, num_classes=2)
print(sum(p.numel() for p in model.parameters()))
model.forward(torch.randn(2, 1, 256, 256)).shape

109431


torch.Size([2, 1, 8, 8, 7])

In [37]:
datamodule = Datamodule(
    datadir=Path("yeast_cell_in_microstructures_dataset"),
    dataset_class=YeastDetectionDataset,
    batch_size=6,
)

datamodule.setup("fit")

trainer = L.Trainer(
    accelerator="auto",
    max_epochs=30,
    limit_train_batches=None,
    limit_val_batches=None,
    log_every_n_steps=10,
)
lit_module = Lit(
    model=TinyYOLO(in_channels=1, num_anchors=len(rescaled_anchors), num_classes=2),
    learning_rate=0.005,
)
trainer.fit(
    model=lit_module,
    datamodule=datamodule,
)
trainer.validate(model=lit_module, datamodule=datamodule,)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type     | Params | Mode 
---------------------------------------------
0 | model   | TinyYOLO | 116 K  | train
1 | loss_fn | YOLOLoss | 0      | train
---------------------------------------------
116 K     Trainable params
0         Non-trainable params
116 K     Total params
0.467     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 1.5890


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1848


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1853


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.1089


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0843


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0724


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0647


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0670


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0636


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0626


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0643


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0651


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0657


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0659


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0650


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0652


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0647


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0652


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0650


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0656


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0656


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0646


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0650


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0662


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0650


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0652


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0662


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0650


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0658


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0660


Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=30` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


 Val. Loss: 0.0655


Validation: |          | 0/? [00:00<?, ?it/s]

 Val. Loss: 0.0655
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        val_loss            0.06549600511789322
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.06549600511789322}]

In [38]:
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

metric = MeanAveragePrecision(iou_type="bbox", box_format="cxcywh", class_metrics=True).to(device)

for x, y in val_loader:
    predicted_boxes = []
    target_boxes = []

    preds = lit_module.model.to(device)(x.to(device))

    # итерируемся по элементам батча, собирая пердсказанные и верные рамки
    for i in range(len(y)):
        # получаем предсказанные рамки
        pred_boxes = cells_to_bboxes(preds[i:i+1], rescaled_anchors.to(device), is_predictions=True)
        # ВАЖНО: делаем non-max suppression ДО расчёта метрик
        pred_boxes = pred_boxes[nms(box_convert(pred_boxes[:, 2:], "cxcywh", "xyxy"), pred_boxes[:, 1], iou_threshold=0.3)]
        predicted_boxes.append(
            dict(
                boxes=pred_boxes[:, 2:],
                scores=pred_boxes[:, 1],
                labels=pred_boxes[:, 0].long(),
            )
        )
        # достаём правильные рамки
        true_boxes = cells_to_bboxes(y[i:i+1], rescaled_anchors)
        true_boxes = true_boxes[true_boxes[:, 1] == 1]

        target_boxes.append(
            dict(
                boxes=true_boxes[:, 2:].to(device),
                labels=true_boxes[:, 0].long().to(device),
            )
        )
    metric.update(predicted_boxes, target_boxes)

metric.compute()

{'map': tensor(0.6622),
 'map_50': tensor(0.9516),
 'map_75': tensor(0.7335),
 'map_small': tensor(0.6622),
 'map_medium': tensor(-1.),
 'map_large': tensor(-1.),
 'mar_1': tensor(0.3434),
 'mar_10': tensor(0.7170),
 'mar_100': tensor(0.7212),
 'mar_small': tensor(0.7212),
 'mar_medium': tensor(-1.),
 'mar_large': tensor(-1.),
 'map_per_class': tensor([0.8227, 0.5018]),
 'mar_100_per_class': tensor([0.8571, 0.5852]),
 'classes': tensor([1, 2], dtype=torch.int32)}