In [2]:
import os
import json
from PIL import Image
import torch
from torchvision.transforms import ToTensor

def load_annotations(txt_file_path):
    boxes = []
    try:
        with open(txt_file_path, 'r') as f:
            for line in f:
                coords = list(map(int, line.strip().split(',')))
                if len(coords) == 4:
                    boxes.append(coords)
    except FileNotFoundError:
        print(f"Warning: Annotation file {txt_file_path} not found. Assuming no objects.")
    if not boxes:
        return {
            "boxes": torch.zeros((0, 4), dtype=torch.float32),
            "labels": torch.zeros((0,), dtype=torch.int64)
        }

    return {
        "boxes": torch.tensor(boxes, dtype=torch.float32),
        "labels": torch.tensor([1] * len(boxes), dtype=torch.int64)
    }

def prepare_data(images_dir, annotations_dir):
    dataset = []
    for file_name in os.listdir(annotations_dir):
        if file_name.endswith('.txt'):
            img_name = file_name.replace('.txt', '.jpg')
            img_path = os.path.join(images_dir, img_name)
            ann_path = os.path.join(annotations_dir, file_name)

            annotations = load_annotations(ann_path)
            dataset.append((img_path, annotations))
    return dataset


In [3]:
data = prepare_data("./spbu-dl-2024-people-detection/train/images", "./spbu-dl-2024-people-detection/train/annotations")
print(data[0])  # Пример одной записи


('./spbu-dl-2024-people-detection/train/images/1606.jpg', {'boxes': tensor([[ 12., 370., 103., 488.],
        [104., 374., 194., 496.],
        [199., 386., 292., 495.],
        [341., 394., 430., 495.],
        [191., 305., 410., 379.],
        [181., 229., 301., 358.]]), 'labels': tensor([1, 1, 1, 1, 1, 1])})


In [4]:
len(data)

3344

In [5]:
from torch.utils.data import Dataset

class PeopleDataset(Dataset):
    def __init__(self, data, transforms=None):
        self.data = data
        self.transforms = transforms

    def __getitem__(self, idx):
        img_path, annotations = self.data[idx]
        img = Image.open(img_path).convert("RGB")
        boxes = torch.tensor(annotations['boxes'], dtype=torch.float32)
        labels = torch.tensor(annotations['labels'], dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}
        if self.transforms:
            img = self.transforms(img)
        return img, target

    def __len__(self):
        return len(self.data)


In [6]:
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN_ResNet50_FPN_Weights

# Загружаем модель и настраиваем под один класс (люди)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 2  # фон + класс "человек"
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [7]:
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

train_dataset = PeopleDataset(train_data, transforms=ToTensor())
val_dataset = PeopleDataset(val_data, transforms=ToTensor())

from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


In [8]:
import torch
import torch.optim as optim

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.005)
num_epochs = 10


In [9]:
for epoch in range(num_epochs):
    model.train()
    for imgs, targets in train_loader:
        imgs = [img.to(device) for img in imgs]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(imgs, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {losses.item():.4f}")

  boxes = torch.tensor(annotations['boxes'], dtype=torch.float32)
  labels = torch.tensor(annotations['labels'], dtype=torch.int64)


Epoch 1, Loss: nan


KeyboardInterrupt: 

In [16]:
from torchvision.ops import box_iou

def evaluate(model, val_loader):
    model.eval()
    ious = []

    with torch.no_grad():
        for imgs, targets in val_loader:
            imgs = [img.to(device) for img in imgs]
            preds = model(imgs)
            for pred, target in zip(preds, targets):
                iou = box_iou(pred['boxes'].cpu(), target['boxes'])
                ious.append(iou.diagonal().mean().item())
    return sum(ious) / len(ious)


In [17]:
evaluate(model, val_loader)

  boxes = torch.tensor(annotations['boxes'], dtype=torch.float32)
  labels = torch.tensor(annotations['labels'], dtype=torch.int64)


nan