## 1. imports + environment check

In [None]:
import sys, platform
print("Python:", sys.version)
print("Executable:", sys.executable)
print("Arch:", platform.architecture())

Python: 3.12.7 | packaged by conda-forge | (main, Oct  4 2024, 15:47:54) [MSC v.1941 64 bit (AMD64)]
Executable: c:\Users\owner\anaconda3\python.exe
Arch: ('64bit', 'WindowsPE')


In [None]:
%pip install pycocotools

from PIL import Image
import torch, torchvision
from torchvision.datasets import CocoDetection
from pycocotools.coco import COCO

Note: you may need to restart the kernel to use updated packages.
Pillow OK
Torch: 2.9.1+cpu
Torchvision: 0.24.1+cpu
pycocotools OK ✅


## 2. paths + config

In [20]:
from pathlib import Path
import random
import numpy as np
import torch

ROOT = Path("signs_coco")
IMG_DIR = ROOT / "images"
ANN_FILE = ROOT / "result.json"

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

device = torch.device("cpu") 
print("Using device:", device)


Using device: cpu


## 3. dataset + transforms

In [None]:
import torch
from torchvision.datasets import CocoDetection
import torchvision.transforms.functional as F

def transforms(img, target):
    img = F.to_tensor(img)
    return img, target

ATTRIBUTE_NAMES = {
    "Front", "Side", "Back",
    "Wall-mounted", "Pole-mounted",
    "Good", "Weathered", "Heavily Damaged", "Damaged",
    "Circular", "Square", "Triangular", "Octagonal",
}

from pathlib import Path
from PIL import Image

class CocoForSignTypeDetection(CocoDetection):
    def __init__(self, img_folder, ann_file, transforms=None):
        super().__init__(img_folder, ann_file)
        self._transforms = transforms
        self.img_folder = Path(img_folder)

        cats = self.coco.loadCats(self.coco.getCatIds())
        cats = sorted(cats, key=lambda c: c["id"])

        sign_cats = [c for c in cats if c["name"] not in ATTRIBUTE_NAMES]

        self.cat_id_to_contig = {c["id"]: i + 1 for i, c in enumerate(sign_cats)}
        self.contig_to_name = {i + 1: c["name"] for i, c in enumerate(sign_cats)}
        self.num_classes = len(sign_cats) + 1

        self._kept_names = [c["name"] for c in sign_cats]
        self._dropped_names = [c["name"] for c in cats if c["name"] in ATTRIBUTE_NAMES]

    def _load_image(self, id: int):
        # COCO json file_name may contain a Label Studio path; we only want the basename
        file_name = self.coco.loadImgs(id)[0]["file_name"]
        base = Path(file_name).name  # strips ../../label-studio/... and keeps IMG_xxx.jpg
        img_path = self.img_folder / base
        return Image.open(img_path).convert("RGB")

    def __getitem__(self, idx):
        id = self.ids[idx]
        img = self._load_image(id)
        anns = self.coco.loadAnns(self.coco.getAnnIds(imgIds=id))

        boxes, labels = [], []
        for a in anns:
            coco_cat_id = a["category_id"]
            if coco_cat_id not in self.cat_id_to_contig:
                continue

            x, y, w, h = a["bbox"]
            if w <= 1 or h <= 1:
                continue
            boxes.append([x, y, x + w, y + h])
            labels.append(self.cat_id_to_contig[coco_cat_id])

        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([id]),
        }

        if self._transforms:
            img, target = self._transforms(img, target)

        return img, target

dataset = CocoForSignTypeDetection(str(IMG_DIR), str(ANN_FILE), transforms=transforms)

print("Dropped attribute categories:", sorted(dataset._dropped_names))
print("Number of sign-type classes:", dataset.num_classes - 1)
print("num_classes (incl background):", dataset.num_classes)

found = 0
for i in range(len(dataset)):
    img, tgt = dataset[i]
    if len(tgt["boxes"]) > 0:
        print("Example idx:", i, "boxes:", len(tgt["boxes"]), "labels sample:", tgt["labels"][:5].tolist())
        found += 1
    if found == 3:
        break

print("Found:", found)



loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Dropped attribute categories: ['Back', 'Circular', 'Damaged', 'Front', 'Good', 'Heavily Damaged', 'Octagonal', 'Pole-mounted', 'Side', 'Square', 'Triangular', 'Wall-mounted', 'Weathered']
Number of sign-type classes: 25
num_classes (incl background): 26
Example idx: 0 boxes: 3 labels sample: [19, 22, 6]
Example idx: 1 boxes: 2 labels sample: [22, 19]
Example idx: 2 boxes: 2 labels sample: [19, 22]
Found: 3


## 4

In [22]:
from torch.utils.data import Subset
import random

indices = list(range(len(dataset)))
random.seed(42)
random.shuffle(indices)

split = int(0.8 * len(indices))
train_ds = Subset(dataset, indices[:split])
val_ds   = Subset(dataset, indices[split:])

print("Train:", len(train_ds), "Val:", len(val_ds))


Train: 481 Val: 121


## 5. dataloaders

In [23]:
from torch.utils.data import DataLoader

def collate_fn(batch):
    return tuple(zip(*batch))

BATCH_SIZE = 2      
NUM_WORKERS = 0     

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=NUM_WORKERS, collate_fn=collate_fn
)

val_loader = DataLoader(
    val_ds, batch_size=1, shuffle=False,
    num_workers=NUM_WORKERS, collate_fn=collate_fn
)


## 6. model 

In [24]:
import torch
import torchvision
from torchvision.models.detection.retinanet import RetinaNet_ResNet50_FPN_Weights

device = torch.device("cpu")
weights = RetinaNet_ResNet50_FPN_Weights.DEFAULT

model = torchvision.models.detection.retinanet_resnet50_fpn(weights=weights)

from torchvision.models.detection.retinanet import RetinaNetClassificationHead
num_anchors = model.head.classification_head.num_anchors

first_block = model.head.classification_head.conv[0]
conv0 = first_block[0]  
in_channels = conv0.in_channels

model.head.classification_head = RetinaNetClassificationHead(
    in_channels=in_channels,
    num_anchors=num_anchors,
    num_classes=dataset.num_classes 
)

model.to(device)
print("Model ready. Classes incl background:", dataset.num_classes)
print("Head num_classes:", model.head.classification_head.num_classes)



Model ready. Classes incl background: 26
Head num_classes: 26


In [None]:
from torch.optim import AdamW
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

## training loop

In [None]:
model.train()
images, targets = next(iter(train_loader))
images = [img.to(device) for img in images]
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

loss_dict = model(images, targets)
loss = sum(loss_dict.values())

optimizer.zero_grad()
loss.backward()
optimizer.step()

print({k: float(v.detach().cpu()) for k, v in loss_dict.items()})


One-batch training step OK ✅
{'classification': 1.231806993484497, 'bbox_regression': 0.6752537488937378}


In [None]:
from tqdm import tqdm
import time

EPOCHS = 10
ACCUM_STEPS = 1  

def train_one_epoch(epoch):
    model.train()
    running = 0.0
    optimizer.zero_grad()

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (images, targets) in pbar:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        loss = sum(loss_dict.values()) / ACCUM_STEPS
        loss.backward()

        if (step + 1) % ACCUM_STEPS == 0:
            optimizer.step()
            optimizer.zero_grad()

        running += loss.item() * ACCUM_STEPS
        pbar.set_description(f"Epoch {epoch} | avg loss {running/(step+1):.4f}")

    optimizer.step()
    optimizer.zero_grad()
    return running / max(1, len(train_loader))

for epoch in range(1, EPOCHS + 1):
    t0 = time.time()
    avg_loss = train_one_epoch(epoch)
    print(f"Epoch {epoch}/{EPOCHS} | avg loss={avg_loss:.4f} | time={time.time()-t0:.1f}s")


Epoch 1 | avg loss 1.2795: 100%|██████████| 241/241 [1:09:41<00:00, 17.35s/it]


Epoch 1/10 | avg loss=1.2795 | time=4181.5s


Epoch 2 | avg loss 0.9173: 100%|██████████| 241/241 [1:08:25<00:00, 17.03s/it]


Epoch 2/10 | avg loss=0.9173 | time=4105.3s


Epoch 3 | avg loss 0.8361: 100%|██████████| 241/241 [1:07:14<00:00, 16.74s/it]


Epoch 3/10 | avg loss=0.8361 | time=4034.2s


Epoch 4 | avg loss 0.8902: 100%|██████████| 241/241 [1:07:56<00:00, 16.91s/it]


Epoch 4/10 | avg loss=0.8902 | time=4076.2s


Epoch 5 | avg loss 0.7069: 100%|██████████| 241/241 [1:07:51<00:00, 16.90s/it]


Epoch 5/10 | avg loss=0.7069 | time=4071.8s


Epoch 6 | avg loss 0.7315:  30%|██▉       | 72/241 [21:04<49:27, 17.56s/it]  


KeyboardInterrupt: 

## evaluation + qualitativ predictions

## save weights + inference function

## analytics