In [2]:
import numpy as np
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import fiftyone as fo
import fiftyone.zoo as foz
import os
from PIL import Image
import json
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from torchvision.ops import box_iou
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
train_dataset = foz.load_zoo_dataset(
              "open-images-v7",
              split="train",
              label_types=["detections"],
              classes=["Ball"],
              max_samples=400,
          )

val_dataset = foz.load_zoo_dataset(
              "open-images-v7",
              split="validation",
              label_types=["detections"],
              classes=["Ball"],
              max_samples=50,
          )

test_dataset = foz.load_zoo_dataset(
              "open-images-v7",
              split="test",
              label_types=["detections"],
              classes=["Ball"],
              max_samples=50,
          )

Downloading split 'train' to 'C:\Users\INadtochii\fiftyone\open-images-v7\train' if necessary
Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to 'C:\Users\INadtochii\fiftyone\open-images-v7\train\metadata\image_ids.csv'
 100% |██████|    4.8Gb/4.8Gb [52.9s elapsed, 0s remaining, 102.4Mb/s]     
Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to 'C:\Users\INadtochii\fiftyone\open-images-v7\train\metadata\classes.csv'
Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to 'C:\Users\INADTO~1\AppData\Local\Temp\tmpv36lzvw1\metadata\hierarchy.json'
Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv' to 'C:\Users\INadtochii\fiftyone\open-images-v7\train\labels\detections.csv'
Downloading 400 images
 100% |███████████████████| 400/400 [22.1s elapsed, 0s remaining, 5.0 files/s]       
Dataset info written to 'C

In [28]:
# Экспорт в COCO
export_dir_train = "openimages_fiftyone_train"
train_dataset.export(
    export_dir=export_dir_train,
    dataset_type=fo.types.COCODetectionDataset,
    label_field="ground_truth",
    overwrite=True
)

export_dir_val = "openimages_fiftyone_val"
val_dataset.export(
    export_dir=export_dir_val,
    dataset_type=fo.types.COCODetectionDataset,
    label_field="ground_truth",
    overwrite=True
)

export_dir_test = "openimages_fiftyone_test"
test_dataset.export(
    export_dir=export_dir_test,
    dataset_type=fo.types.COCODetectionDataset,
    label_field="ground_truth",
    overwrite=True
)

 100% |█████████████████| 400/400 [2.4s elapsed, 0s remaining, 175.4 samples/s]      
 100% |███████████████████| 50/50 [411.9ms elapsed, 0s remaining, 121.4 samples/s]      
 100% |███████████████████| 50/50 [446.1ms elapsed, 0s remaining, 112.1 samples/s]      


In [3]:
class COCODetectionDataset(Dataset):
    def __init__(self, root, annFile, transforms=None):
        self.root = root
        self.transforms = transforms

        # Загружаем аннотации COCO
        with open(annFile, "r") as f:
            coco_data = json.load(f)

        # Сопоставление id → информация об изображении
        self.images = {img["id"]: img for img in coco_data["images"]}

        # Группируем аннотации по image_id
        self.annotations = {}
        for ann in coco_data["annotations"]:
            img_id = ann["image_id"]
            if img_id not in self.annotations:
                self.annotations[img_id] = []
            self.annotations[img_id].append(ann)

        # Список image_id
        self.ids = list(self.images.keys())

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        img_info = self.images[img_id]
        annots = self.annotations.get(img_id, [])

        img_path = os.path.join(self.root, img_info["file_name"])
        # Albumentations работает с numpy
        img = np.array(Image.open(img_path).convert("RGB"))

        boxes = []
        labels = []

        for ann in annots:
            xmin, ymin, w, h = ann["bbox"]
            xmax = xmin + w
            ymax = ymin + h
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(ann["category_id"])

        # Если заданы трансформации (Albumentations)
        if self.transforms:
            transformed = self.transforms(
                image=img,
                bboxes=boxes,
                class_labels=labels
            )
            img = transformed["image"]
            boxes = transformed["bboxes"]
            labels = transformed["class_labels"]

        # Преобразуем в torch тензоры
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([img_id])

        # Вычисляем площади и флаги
        if boxes.numel() > 0:
            areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        else:
            areas = torch.zeros((0,), dtype=torch.float32)

        iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": image_id,
            "area": areas,
            "iscrowd": iscrowd
        }

        return img, target

In [4]:
train_transform = A.Compose([
    # Простые цветовые аугментации
    A.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1, p=0.7),
    A.RandomBrightnessContrast(p=0.3),
    A.HueSaturationValue(p=0.3),
    A.Blur(blur_limit=3, p=0.1),
    A.MotionBlur(blur_limit=3, p=0.1),
    A.ToGray(p=0.05),
    A.Resize(480, 480),

    # Геометрические (затрагивают bbox)
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.ShiftScaleRotate(
        shift_limit=0.1,
        scale_limit=0.2,
        rotate_limit=15,
        border_mode=0,
        p=0.7
    ),

    ToTensorV2()
],
    bbox_params=A.BboxParams(
        format='pascal_voc',
        label_fields=['class_labels'],
        min_visibility=0.3
    )
)

val_transform = A.Compose([
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

train_dataset = COCODetectionDataset(
    root="openimages_fiftyone_train/data",
    annFile="openimages_fiftyone_train/labels.json",
    transforms=train_transform
)

val_dataset = COCODetectionDataset(
    root="openimages_fiftyone_val/data",
    annFile="openimages_fiftyone_val/labels.json",
    transforms=val_transform
)

test_dataset = COCODetectionDataset(
    root="openimages_fiftyone_test/data",
    annFile="openimages_fiftyone_test/labels.json",
    transforms=None
)

def collate_fn(batch):
    return tuple(zip(*batch))

subset_indices = list(range(len(train_dataset)//2))
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=2, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=2, collate_fn=collate_fn)

  original_init(self, **validated_kwargs)
  self._set_keys()


In [5]:
# === Train/Eval ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_model(model, train_loader, val_loader, device, optimizer, num_epochs=5):
    model.to(device)
    model.train()

    for epoch in range(num_epochs):
        print(f"\n===== Epoch {epoch+1}/{num_epochs} =====")
        total_loss = 0.0

        # TRAIN
        model.train()
        for images, targets in tqdm(train_loader, desc=f"Train {epoch+1}"):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            losses.backward()
            optimizer.step()
            total_loss += losses.item()

        avg_train_loss = total_loss / len(train_loader)

        # EVAL
        model.eval()
        all_pred_boxes = []
        all_gt_boxes = []
        with torch.no_grad():
            for images, targets in tqdm(val_loader, desc=f"Eval {epoch+1}"):
                images = [img.to(device) for img in images]
                outputs = model(images)

                for out, tgt in zip(outputs, targets):
                    pred_boxes = out["boxes"].cpu()
                    gt_boxes = tgt["boxes"]
                    all_pred_boxes.append(pred_boxes)
                    all_gt_boxes.append(gt_boxes)

        print(f"Train loss: {avg_train_loss:.4f}")

    return model


In [6]:
def calculate_map(model, data_loader, coco_ann_file, device):
    """
    Вычисляет COCO-style mAP для модели на заданном даталоадере
    """
    model.eval()
    coco_gt = COCO(coco_ann_file)
    coco_results = []

    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="mAP Evaluation"):
            images = [img.to(device) for img in images]
            outputs = model(images)

            for out, tgt in zip(outputs, targets):
                boxes = out["boxes"].cpu().numpy()
                scores = out["scores"].cpu().numpy()
                labels = out["labels"].cpu().numpy()

                for box, score, label in zip(boxes, scores, labels):
                    x1, y1, x2, y2 = box
                    w, h = x2 - x1, y2 - y1
                    coco_results.append({
                        "image_id": int(tgt["image_id"].item()),
                        "category_id": int(label),
                        "bbox": [float(x1), float(y1), float(w), float(h)],
                        "score": float(score)
                    })

    coco_dt = coco_gt.loadRes(coco_results)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()  # Выводит mAP@[.5:.95], mAP@0.5, mAP@0.75 и AP по размерам

In [7]:
learning_rate = 0.01
num_epochs = 5
optimizer_name = "SGD"

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
    weights=None,
    num_classes=2  # Ball + background
).to(device)

# Оптимизатор
if optimizer_name == "SGD":
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005)
elif optimizer_name == "Adam":
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
elif optimizer_name == "AdamW":
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
else:
    raise ValueError("Unknown optimizer")

In [8]:
print(device)

cuda


In [None]:
trained_model = train_model(model, train_loader, val_loader, device, optimizer, num_epochs=num_epochs)


===== Epoch 1/5 =====


Train 1:   0%|          | 0/200 [00:00<?, ?it/s]