In [2]:
%pip install torch torchvision pycocotools opencv-python matplotlib

Наиболее часто упоминаемая модель: CDDMSL (встречается 7 раз)


In [None]:
import torch
import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.models.detection.mask_rcnn import maskrcnn_resnet50_fpn
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
class CustomCocoDataset(CocoDetection):
    def __init__(self, root, annFile, transforms=None):
        super(CustomCocoDataset, self).__init__(root, annFile)
        self.transforms = transforms

    def __getitem__(self, idx):
        img, target = super(CustomCocoDataset, self).__getitem__(idx)

        # Преобразуем аннотации в формат PyTorch
        image_id = target[0]["image_id"]
        target = {
            "boxes": torch.as_tensor([obj["bbox"] for obj in target], dtype=torch.float32),
            "labels": torch.as_tensor([obj["category_id"] for obj in target], dtype=torch.int64),
            "masks": torch.as_tensor(
                [self.coco.annToMask(obj) for obj in target], dtype=torch.uint8
            ),
            "image_id": torch.tensor([image_id]),
            "area": torch.tensor([obj["area"] for obj in target], dtype=torch.float32),
            "iscrowd": torch.tensor([obj["iscrowd"] for obj in target], dtype=torch.int64),
        }

        if self.transforms:
            img = self.transforms(img)
        return img, target

In [None]:
def get_transform(train):
    transforms = []
    transforms.append(F.to_tensor)
    if train:
        transforms.append(F.hflip)  # Пример горизонтального отражения
    return torchvision.transforms.Compose(transforms)

In [None]:
train_dataset = CustomCocoDataset(
    root="dataset/train/images",
    annFile="dataset/train/annotations.json",
    transforms=get_transform(train=True)
)

val_dataset = CustomCocoDataset(
    root="dataset/val/images",
    annFile="dataset/val/annotations.json",
    transforms=get_transform(train=False)
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
def get_model(num_classes):
    # Загружаем предобученную Mask R-CNN
    model = maskrcnn_resnet50_fpn(pretrained=True)

    # Заменяем классификатор (head) для нашей задачи
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    # Заменяем масочный head
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(
        in_features_mask, hidden_layer, num_classes
    )

    return model

In [None]:
def train_model(model, train_loader, val_loader, num_epochs=10, lr=0.005, device="cuda"):
    model.to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=lr, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0

        for images, targets in train_loader:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Обнуляем градиенты
            optimizer.zero_grad()

            # Вычисляем loss
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            # Обратное распространение ошибки
            losses.backward()
            optimizer.step()

            train_loss += losses.item()

        # Шаг изменения learning rate
        lr_scheduler.step()

        print(f"Эпоха {epoch + 1}: Потеря {train_loss / len(train_loader):.4f}")

    print("Обучение завершено")
    return model

In [None]:
if __name__ == "__main__":
    num_classes = 3  # Млекопитающие + 1 класс для фона
    model = get_model(num_classes)

    # Обучение модели
    model = train_model(model, train_loader, val_loader, num_epochs=10, lr=0.005)
    
    # Сохранение модели
    torch.save(model.state_dict(), "mammals_mask_rcnn.pth")